From d8616ee2affcff37c5d315310da557a694a3303d Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 24 May 2022 15:53:11 +0800 Subject: bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues During TCP sockmap redirect pressure test, the following warning is triggered: WARNING: CPU: 3 PID: 2145 at net/core/stream.c:205 sk_stream_kill_queues+0xbc/0xd0 CPU: 3 PID: 2145 Comm: iperf Kdump: loaded Tainted: G W 5.10.0+ #9 Call Trace: inet_csk_destroy_sock+0x55/0x110 inet_csk_listen_stop+0xbb/0x380 tcp_close+0x41b/0x480 inet_release+0x42/0x80 __sock_release+0x3d/0xa0 sock_close+0x11/0x20 __fput+0x9d/0x240 task_work_run+0x62/0x90 exit_to_user_mode_prepare+0x110/0x120 syscall_exit_to_user_mode+0x27/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The reason we observed is that: When the listener is closing, a connection may have completed the three-way handshake but not accepted, and the client has sent some packets. The child sks in accept queue release by inet_child_forget()->inet_csk_destroy_sock(), but psocks of child sks have not released. To fix, add sock_map_destroy to release psocks. Signed-off-by: Wang Yufen Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220524075311.649153-1-wangyufen@huawei.com --- include/linux/bpf.h | 1 + include/linux/skmsg.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b914a56a2c5..8e6092d0ea95 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2104,6 +2104,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); void sock_map_unhash(struct sock *sk); +void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c5a2d6f50f25..153b6dec9b6a 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -95,6 +95,7 @@ struct sk_psock { spinlock_t link_lock; refcount_t refcnt; void (*saved_unhash)(struct sock *sk); + void (*saved_destroy)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); -- cgit v1.2.3 From ff8372a467fa28752610f57a6512c5365413faa2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 6 Jun 2022 10:24:34 +0800 Subject: net: skb: move enum skb_drop_reason to standalone header file As the skb drop reasons are getting more and more, move the enum 'skb_drop_reason' and related function to the standalone header 'dropreason.h', as Jakub Kicinski suggested. Signed-off-by: Menglong Dong Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 179 +-------------------------------------------- include/net/dropreason.h | 184 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+), 178 deletions(-) create mode 100644 include/net/dropreason.h (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d3d10556f0fa..82edf0359ab3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -43,6 +43,7 @@ #include #endif #include +#include /** * DOC: skb checksums @@ -337,184 +338,6 @@ struct sk_buff_head { struct sk_buff; -/* The reason of skb drop, which is used in kfree_skb_reason(). - * en...maybe they should be splited by group? - * - * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is - * used to translate the reason to string. - */ -enum skb_drop_reason { - SKB_NOT_DROPPED_YET = 0, - SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ - SKB_DROP_REASON_NO_SOCKET, /* socket not found */ - SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ - SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ - SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ - SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ - SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ - SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current - * host (interface is in promisc - * mode) - */ - SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ - SKB_DROP_REASON_IP_INHDR, /* there is something wrong with - * IP header (see - * IPSTATS_MIB_INHDRERRORS) - */ - SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. - * see the document for rp_filter - * in ip-sysctl.rst for more - * information - */ - SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 - * is multicast, but L3 is - * unicast. - */ - SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ - SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ - SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ - SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as - * udp packet drop out of - * udp_memory_allocated. - */ - SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one - * expected, corresponding - * to LINUX_MIB_TCPMD5NOTFOUND - */ - SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not - * expecting one, corresponding - * to LINUX_MIB_TCPMD5UNEXPECTED - */ - SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, - * corresponding to - * LINUX_MIB_TCPMD5FAILURE - */ - SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket - * backlog (see - * LINUX_MIB_TCPBACKLOGDROP) - */ - SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ - SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, - * see LINUX_MIB_TCPZEROWINDOWDROP - */ - SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already - * received before (spurious retrans - * may happened), see - * LINUX_MIB_DELAYEDACKLOST - */ - SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, - * the seq of the first byte exceed - * the right edges of receive - * window - */ - SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in - * the ofo queue, corresponding to - * LINUX_MIB_TCPOFOMERGE - */ - SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED - */ - SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ - SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ - SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ - SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ - SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ - SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ - SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ - SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ - SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ - SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ - SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by - * BPF_PROG_TYPE_CGROUP_SKB - * eBPF program - */ - SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ - SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh - * entry - */ - SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ - SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh - * entry is full - */ - SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ - SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ - SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet - * outputting (failed to enqueue to - * current qdisc) - */ - SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to - * the per CPU backlog queue. This - * can be caused by backlog queue - * full (see netdev_max_backlog in - * net.rst) or RPS flow limit - */ - SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ - SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented - * or not supported - */ - SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation - * error - */ - SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ - SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from - * user space, e.g., via - * zerocopy_sg_from_iter() - * or skb_orphan_frags_rx() - */ - SKB_DROP_REASON_DEV_HDR, /* device driver specific - * header/metadata is invalid - */ - /* the device is not ready to xmit/recv due to any of its data - * structure that is not up/ready/initialized, e.g., the IFF_UP is - * not set, or driver specific tun->tfiles[txq] is not initialized - */ - SKB_DROP_REASON_DEV_READY, - SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ - SKB_DROP_REASON_NOMEM, /* error due to OOM */ - SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header - * from networking data, e.g., failed - * to pull the protocol header from - * frags via pskb_may_pull() - */ - SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly - * attached to tun/tap, e.g., via - * TUNSETFILTEREBPF - */ - SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented - * at tun/tap, e.g., check_filter() - */ - SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ - SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC - * 2211, such as a broadcasts - * ICMP_TIMESTAMP - */ - SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed - * the MTU) - */ - SKB_DROP_REASON_MAX, -}; - -#define SKB_DR_INIT(name, reason) \ - enum skb_drop_reason name = SKB_DROP_REASON_##reason -#define SKB_DR(name) \ - SKB_DR_INIT(name, NOT_SPECIFIED) -#define SKB_DR_SET(name, reason) \ - (name = SKB_DROP_REASON_##reason) -#define SKB_DR_OR(name, reason) \ - do { \ - if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ - name == SKB_NOT_DROPPED_YET) \ - SKB_DR_SET(name, reason); \ - } while (0) - /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. diff --git a/include/net/dropreason.h b/include/net/dropreason.h new file mode 100644 index 000000000000..ecd18b7b1364 --- /dev/null +++ b/include/net/dropreason.h @@ -0,0 +1,184 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_DROPREASON_H +#define _LINUX_DROPREASON_H + +/* The reason of skb drop, which is used in kfree_skb_reason(). + * en...maybe they should be splited by group? + * + * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is + * used to translate the reason to string. + */ +enum skb_drop_reason { + SKB_NOT_DROPPED_YET = 0, + SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ + SKB_DROP_REASON_NO_SOCKET, /* socket not found */ + SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ + SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ + SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ + SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ + SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ + SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current + * host (interface is in promisc + * mode) + */ + SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ + SKB_DROP_REASON_IP_INHDR, /* there is something wrong with + * IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. + * see the document for rp_filter + * in ip-sysctl.rst for more + * information + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 + * is multicast, but L3 is + * unicast. + */ + SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ + SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ + SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ + SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as + * udp packet drop out of + * udp_memory_allocated. + */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one + * expected, corresponding + * to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not + * expecting one, corresponding + * to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, + * corresponding to + * LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket + * backlog (see + * LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ + SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already + * received before (spurious retrans + * may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, + * the seq of the first byte exceed + * the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in + * the ofo queue, corresponding to + * LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ + SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ + SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ + SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by + * BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh + * entry + */ + SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh + * entry is full + */ + SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ + SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ + SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet + * outputting (failed to enqueue to + * current qdisc) + */ + SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to + * the per CPU backlog queue. This + * can be caused by backlog queue + * full (see netdev_max_backlog in + * net.rst) or RPS flow limit + */ + SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ + SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ + SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented + * or not supported + */ + SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation + * error + */ + SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from + * user space, e.g., via + * zerocopy_sg_from_iter() + * or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_DEV_HDR, /* device driver specific + * header/metadata is invalid + */ + /* the device is not ready to xmit/recv due to any of its data + * structure that is not up/ready/initialized, e.g., the IFF_UP is + * not set, or driver specific tun->tfiles[txq] is not initialized + */ + SKB_DROP_REASON_DEV_READY, + SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ + SKB_DROP_REASON_NOMEM, /* error due to OOM */ + SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header + * from networking data, e.g., failed + * to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly + * attached to tun/tap, e.g., via + * TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented + * at tun/tap, e.g., check_filter() + */ + SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ + SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC + * 2211, such as a broadcasts + * ICMP_TIMESTAMP + */ + SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed + * the MTU) + */ + SKB_DROP_REASON_MAX, +}; + +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ + name == SKB_NOT_DROPPED_YET) \ + SKB_DR_SET(name, reason); \ + } while (0) + +#endif -- cgit v1.2.3 From 6089fb325cf737eeb2c4d236c94697112ca860da Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:26:00 -0700 Subject: bpf: Add btf enum64 support Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM. But in kernel, some enum indeed has 64bit values, e.g., in uapi bpf.h, we have enum { BPF_F_INDEX_MASK = 0xffffffffULL, BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, BPF_F_CTXLEN_MASK = (0xfffffULL << 32), }; In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK as 0, which certainly is incorrect. This patch added a new btf kind, BTF_KIND_ENUM64, which permits 64bit value to cover the above use case. The BTF_KIND_ENUM64 has the following three fields followed by the common type: struct bpf_enum64 { __u32 nume_off; __u32 val_lo32; __u32 val_hi32; }; Currently, btf type section has an alignment of 4 as all element types are u32. Representing the value with __u64 will introduce a pad for bpf_enum64 and may also introduce misalignment for the 64bit value. Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues. The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64 to indicate whether the value is signed or unsigned. The kflag intends to provide consistent output of BTF C fortmat with the original source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff. The format C has two choices, printing out 0xffffffff or -1 and current libbpf prints out as unsigned value. But if the signedness is preserved in btf, the value can be printed the same as the original source code. The kflag value 0 means unsigned values, which is consistent to the default by libbpf and should also cover most cases as well. The new BTF_KIND_ENUM64 is intended to support the enum value represented as 64bit value. But it can represent all BTF_KIND_ENUM values as well. The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has to be represented with 64 bits. In addition, a static inline function btf_kind_core_compat() is introduced which will be used later when libbpf relo_core.c changed. Here the kernel shares the same relo_core.c with libbpf. [1] https://reviews.llvm.org/D124641 Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 28 ++++++++ include/uapi/linux/btf.h | 17 ++++- kernel/bpf/btf.c | 142 +++++++++++++++++++++++++++++++++++++---- kernel/bpf/verifier.c | 2 +- tools/include/uapi/linux/btf.h | 17 ++++- 5 files changed, 185 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 2611cea2c2b6..1bfed7fa0428 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -177,6 +177,19 @@ static inline bool btf_type_is_enum(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; } +static inline bool btf_is_any_enum(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM || + BTF_INFO_KIND(t->info) == BTF_KIND_ENUM64; +} + +static inline bool btf_kind_core_compat(const struct btf_type *t1, + const struct btf_type *t2) +{ + return BTF_INFO_KIND(t1->info) == BTF_INFO_KIND(t2->info) || + (btf_is_any_enum(t1) && btf_is_any_enum(t2)); +} + static inline bool str_is_empty(const char *s) { return !s || !s[0]; @@ -192,6 +205,16 @@ static inline bool btf_is_enum(const struct btf_type *t) return btf_kind(t) == BTF_KIND_ENUM; } +static inline bool btf_is_enum64(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_ENUM64; +} + +static inline u64 btf_enum64_value(const struct btf_enum64 *e) +{ + return ((u64)e->val_hi32 << 32) | e->val_lo32; +} + static inline bool btf_is_composite(const struct btf_type *t) { u16 kind = btf_kind(t); @@ -332,6 +355,11 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t) return (struct btf_enum *)(t + 1); } +static inline struct btf_enum64 *btf_enum64(const struct btf_type *t) +{ + return (struct btf_enum64 *)(t + 1); +} + static inline const struct btf_var_secinfo *btf_type_var_secinfo( const struct btf_type *t) { diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index a9162a6c0284..ec1798b6d3ff 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -36,10 +36,10 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, enum, fwd and enum64 */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, @@ -63,7 +63,7 @@ enum { BTF_KIND_ARRAY = 3, /* Array */ BTF_KIND_STRUCT = 4, /* Struct */ BTF_KIND_UNION = 5, /* Union */ - BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ BTF_KIND_FWD = 7, /* Forward */ BTF_KIND_TYPEDEF = 8, /* Typedef */ BTF_KIND_VOLATILE = 9, /* Volatile */ @@ -76,6 +76,7 @@ enum { BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -186,4 +187,14 @@ struct btf_decl_tag { __s32 component_idx; }; +/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". + * The exact number of btf_enum64 is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7bccaa4646e5..6c0d8480e15c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -309,6 +309,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; const char *btf_type_str(const struct btf_type *t) @@ -666,6 +667,7 @@ static bool btf_type_has_size(const struct btf_type *t) case BTF_KIND_ENUM: case BTF_KIND_DATASEC: case BTF_KIND_FLOAT: + case BTF_KIND_ENUM64: return true; } @@ -711,6 +713,11 @@ static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) return (const struct btf_decl_tag *)(t + 1); } +static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t) +{ + return (const struct btf_enum64 *)(t + 1); +} + static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) { return kind_ops[BTF_INFO_KIND(t->info)]; @@ -1019,6 +1026,7 @@ static const char *btf_show_name(struct btf_show *show) parens = "{"; break; case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: prefix = "enum"; break; default: @@ -1834,6 +1842,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type, case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_FLOAT: + case BTF_KIND_ENUM64: size = type->size; goto resolved; @@ -3670,6 +3679,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, { const struct btf_enum *enums = btf_type_enum(t); struct btf *btf = env->btf; + const char *fmt_str; u16 i, nr_enums; u32 meta_needed; @@ -3683,11 +3693,6 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (btf_type_kflag(t)) { - btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); - return -EINVAL; - } - if (t->size > 8 || !is_power_of_2(t->size)) { btf_verifier_log_type(env, t, "Unexpected size"); return -EINVAL; @@ -3718,7 +3723,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, if (env->log.level == BPF_LOG_KERNEL) continue; - btf_verifier_log(env, "\t%s val=%d\n", + fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n"; + btf_verifier_log(env, fmt_str, __btf_name_by_offset(btf, enums[i].name_off), enums[i].val); } @@ -3759,7 +3765,10 @@ static void btf_enum_show(const struct btf *btf, const struct btf_type *t, return; } - btf_show_type_value(show, "%d", v); + if (btf_type_kflag(t)) + btf_show_type_value(show, "%d", v); + else + btf_show_type_value(show, "%u", v); btf_show_end_type(show); } @@ -3772,6 +3781,109 @@ static struct btf_kind_operations enum_ops = { .show = btf_enum_show, }; +static s32 btf_enum64_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_enum64 *enums = btf_type_enum64(t); + struct btf *btf = env->btf; + const char *fmt_str; + u16 i, nr_enums; + u32 meta_needed; + + nr_enums = btf_type_vlen(t); + meta_needed = nr_enums * sizeof(*enums); + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (t->size > 8 || !is_power_of_2(t->size)) { + btf_verifier_log_type(env, t, "Unexpected size"); + return -EINVAL; + } + + /* enum type either no name or a valid one */ + if (t->name_off && + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + for (i = 0; i < nr_enums; i++) { + if (!btf_name_offset_valid(btf, enums[i].name_off)) { + btf_verifier_log(env, "\tInvalid name_offset:%u", + enums[i].name_off); + return -EINVAL; + } + + /* enum member must have a valid name */ + if (!enums[i].name_off || + !btf_name_valid_identifier(btf, enums[i].name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + if (env->log.level == BPF_LOG_KERNEL) + continue; + + fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n"; + btf_verifier_log(env, fmt_str, + __btf_name_by_offset(btf, enums[i].name_off), + btf_enum64_value(enums + i)); + } + + return meta_needed; +} + +static void btf_enum64_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) +{ + const struct btf_enum64 *enums = btf_type_enum64(t); + u32 i, nr_enums = btf_type_vlen(t); + void *safe_data; + s64 v; + + safe_data = btf_show_start_type(show, t, type_id, data); + if (!safe_data) + return; + + v = *(u64 *)safe_data; + + for (i = 0; i < nr_enums; i++) { + if (v != btf_enum64_value(enums + i)) + continue; + + btf_show_type_value(show, "%s", + __btf_name_by_offset(btf, + enums[i].name_off)); + + btf_show_end_type(show); + return; + } + + if (btf_type_kflag(t)) + btf_show_type_value(show, "%lld", v); + else + btf_show_type_value(show, "%llu", v); + btf_show_end_type(show); +} + +static struct btf_kind_operations enum64_ops = { + .check_meta = btf_enum64_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_enum_check_member, + .check_kflag_member = btf_enum_check_kflag_member, + .log_details = btf_enum_log, + .show = btf_enum64_show, +}; + static s32 btf_func_proto_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) @@ -4438,6 +4550,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = &float_ops, [BTF_KIND_DECL_TAG] = &decl_tag_ops, [BTF_KIND_TYPE_TAG] = &modifier_ops, + [BTF_KIND_ENUM64] = &enum64_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -5299,7 +5412,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_small_int(t) || btf_type_is_enum(t)) + if (btf_type_is_small_int(t) || btf_is_any_enum(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { @@ -5763,7 +5876,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id, if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); - if (btf_type_is_int(t) || btf_type_is_enum(t)) + if (btf_type_is_int(t) || btf_is_any_enum(t)) return t->size; *bad_type = t; return -EINVAL; @@ -5911,7 +6024,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log, * to context only. And only global functions can be replaced. * Hence type check only those types. */ - if (btf_type_is_int(t1) || btf_type_is_enum(t1)) + if (btf_type_is_int(t1) || btf_is_any_enum(t1)) continue; if (!btf_type_is_ptr(t1)) { bpf_log(log, @@ -6408,7 +6521,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (!btf_type_is_int(t) && !btf_type_is_enum(t)) { + if (!btf_type_is_int(t) && !btf_is_any_enum(t)) { bpf_log(log, "Global function %s() doesn't return scalar. Only those are supported.\n", tname); @@ -6423,7 +6536,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, t = btf_type_by_id(btf, args[i].type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_int(t) || btf_type_is_enum(t)) { + if (btf_type_is_int(t) || btf_is_any_enum(t)) { reg->type = SCALAR_VALUE; continue; } @@ -7335,6 +7448,7 @@ recur: case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_FWD: + case BTF_KIND_ENUM64: return 1; case BTF_KIND_INT: /* just reject deprecated bitfield-like integers; all other @@ -7387,10 +7501,10 @@ recur: * field-based relocations. This function assumes that root types were already * checked for name match. Beyond that initial root-level name check, names * are completely ignored. Compatibility rules are as follows: - * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but + * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but * kind should match for local and target types (i.e., STRUCT is not * compatible with UNION); - * - for ENUMs, the size is ignored; + * - for ENUMs/ENUM64s, the size is ignored; * - for INT, size and signedness are ignored; * - for ARRAY, dimensionality is ignored, element types are checked for * compatibility recursively; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aedac2ac02b9..2d2872682278 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10901,7 +10901,7 @@ static int check_btf_func(struct bpf_verifier_env *env, goto err_free; ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); scalar_return = - btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type); + btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type); if (i && !scalar_return && env->subprog_info[i].has_ld_abs) { verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n"); goto err_free; diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index a9162a6c0284..ec1798b6d3ff 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -36,10 +36,10 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, enum, fwd and enum64 */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, @@ -63,7 +63,7 @@ enum { BTF_KIND_ARRAY = 3, /* Array */ BTF_KIND_STRUCT = 4, /* Struct */ BTF_KIND_UNION = 5, /* Union */ - BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ BTF_KIND_FWD = 7, /* Forward */ BTF_KIND_TYPEDEF = 8, /* Typedef */ BTF_KIND_VOLATILE = 9, /* Volatile */ @@ -76,6 +76,7 @@ enum { BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -186,4 +187,14 @@ struct btf_decl_tag { __s32 component_idx; }; +/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". + * The exact number of btf_enum64 is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From d62607c3fe45911b2331fac073355a8c914bbde2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Jun 2022 21:39:55 -0700 Subject: net: rename reference+tracking helpers Netdev reference helpers have a dev_ prefix for historic reasons. Renaming the old helpers would be too much churn but we can rename the tracking ones which are relatively recent and should be the default for new code. Rename: dev_hold_track() -> netdev_hold() dev_put_track() -> netdev_put() dev_replace_track() -> netdev_ref_replace() Link: https://lore.kernel.org/r/20220608043955.919359-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/eql.c | 4 ++-- drivers/net/macsec.c | 4 ++-- drivers/net/macvlan.c | 4 ++-- drivers/net/netconsole.c | 2 +- drivers/net/vrf.c | 8 ++++---- include/linux/netdevice.h | 24 ++++++++++++------------ include/net/xfrm.h | 2 +- net/8021q/vlan_dev.c | 4 ++-- net/ax25/af_ax25.c | 7 ++++--- net/ax25/ax25_dev.c | 6 +++--- net/bridge/br_if.c | 10 +++++----- net/core/dev.c | 10 +++++----- net/core/dev_ioctl.c | 4 ++-- net/core/drop_monitor.c | 5 +++-- net/core/dst.c | 8 ++++---- net/core/failover.c | 4 ++-- net/core/link_watch.c | 2 +- net/core/neighbour.c | 18 +++++++++--------- net/core/net-sysfs.c | 8 ++++---- net/core/netpoll.c | 2 +- net/core/pktgen.c | 6 +++--- net/ethtool/ioctl.c | 4 ++-- net/ethtool/netlink.c | 6 +++--- net/ethtool/netlink.h | 2 +- net/ipv4/devinet.c | 4 ++-- net/ipv4/fib_semantics.c | 11 ++++++----- net/ipv4/ipmr.c | 2 +- net/ipv4/route.c | 7 +++---- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/addrconf_core.c | 2 +- net/ipv6/ip6_gre.c | 8 ++++---- net/ipv6/ip6_tunnel.c | 4 ++-- net/ipv6/ip6_vti.c | 4 ++-- net/ipv6/ip6mr.c | 2 +- net/ipv6/route.c | 10 +++++----- net/ipv6/sit.c | 4 ++-- net/ipv6/xfrm6_policy.c | 4 ++-- net/llc/af_llc.c | 2 +- net/openvswitch/vport-netdev.c | 6 +++--- net/packet/af_packet.c | 12 ++++++------ net/sched/act_mirred.c | 6 +++--- net/sched/sch_api.c | 2 +- net/sched/sch_generic.c | 11 ++++++----- net/smc/smc_pnet.c | 7 ++++--- net/switchdev/switchdev.c | 4 ++-- net/tipc/bearer.c | 4 ++-- net/xfrm/xfrm_device.c | 2 +- 48 files changed, 141 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/eql.c b/drivers/net/eql.c index 557ca8ff9dec..ca3e4700a813 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -225,7 +225,7 @@ static void eql_kill_one_slave(slave_queue_t *queue, slave_t *slave) list_del(&slave->list); queue->num_slaves--; slave->dev->flags &= ~IFF_SLAVE; - dev_put_track(slave->dev, &slave->dev_tracker); + netdev_put(slave->dev, &slave->dev_tracker); kfree(slave); } @@ -399,7 +399,7 @@ static int __eql_insert_slave(slave_queue_t *queue, slave_t *slave) if (duplicate_slave) eql_kill_one_slave(queue, duplicate_slave); - dev_hold_track(slave->dev, &slave->dev_tracker, GFP_ATOMIC); + netdev_hold(slave->dev, &slave->dev_tracker, GFP_ATOMIC); list_add(&slave->list, &queue->all_slaves); queue->num_slaves++; slave->dev->flags |= IFF_SLAVE; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 817577e713d7..815738c0e067 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3462,7 +3462,7 @@ static int macsec_dev_init(struct net_device *dev) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); /* Get macsec's reference to real_dev */ - dev_hold_track(real_dev, &macsec->dev_tracker, GFP_KERNEL); + netdev_hold(real_dev, &macsec->dev_tracker, GFP_KERNEL); return 0; } @@ -3710,7 +3710,7 @@ static void macsec_free_netdev(struct net_device *dev) free_percpu(macsec->secy.tx_sc.stats); /* Get rid of the macsec's reference to real_dev */ - dev_put_track(macsec->real_dev, &macsec->dev_tracker); + netdev_put(macsec->real_dev, &macsec->dev_tracker); } static void macsec_setup(struct net_device *dev) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index eff75beb1395..5b46a6526fc6 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -915,7 +915,7 @@ static int macvlan_init(struct net_device *dev) port->count += 1; /* Get macvlan's reference to lowerdev */ - dev_hold_track(lowerdev, &vlan->dev_tracker, GFP_KERNEL); + netdev_hold(lowerdev, &vlan->dev_tracker, GFP_KERNEL); return 0; } @@ -1185,7 +1185,7 @@ static void macvlan_dev_free(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); /* Get rid of the macvlan's reference to lowerdev */ - dev_put_track(vlan->lowerdev, &vlan->dev_tracker); + netdev_put(vlan->lowerdev, &vlan->dev_tracker); } void macvlan_common_setup(struct net_device *dev) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index ab8cd5551020..ddac61d79145 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -721,7 +721,7 @@ restart: __netpoll_cleanup(&nt->np); spin_lock_irqsave(&target_list_lock, flags); - dev_put_track(nt->np.dev, &nt->np.dev_tracker); + netdev_put(nt->np.dev, &nt->np.dev_tracker); nt->np.dev = NULL; nt->enabled = false; stopped = true; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cfc30ce4c6e1..40445a12c682 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -814,8 +814,8 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) */ if (rt6) { dst = &rt6->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } @@ -1061,8 +1061,8 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) */ if (rth) { dst = &rth->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f615a66c89e9..e2e5088888b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3981,8 +3981,8 @@ static inline void netdev_tracker_free(struct net_device *dev, #endif } -static inline void dev_hold_track(struct net_device *dev, - netdevice_tracker *tracker, gfp_t gfp) +static inline void netdev_hold(struct net_device *dev, + netdevice_tracker *tracker, gfp_t gfp) { if (dev) { __dev_hold(dev); @@ -3990,8 +3990,8 @@ static inline void dev_hold_track(struct net_device *dev, } } -static inline void dev_put_track(struct net_device *dev, - netdevice_tracker *tracker) +static inline void netdev_put(struct net_device *dev, + netdevice_tracker *tracker) { if (dev) { netdev_tracker_free(dev, tracker); @@ -4004,11 +4004,11 @@ static inline void dev_put_track(struct net_device *dev, * @dev: network device * * Hold reference to device to keep it from being freed. - * Try using dev_hold_track() instead. + * Try using netdev_hold() instead. */ static inline void dev_hold(struct net_device *dev) { - dev_hold_track(dev, NULL, GFP_ATOMIC); + netdev_hold(dev, NULL, GFP_ATOMIC); } /** @@ -4016,17 +4016,17 @@ static inline void dev_hold(struct net_device *dev) * @dev: network device * * Release reference to device to allow it to be freed. - * Try using dev_put_track() instead. + * Try using netdev_put() instead. */ static inline void dev_put(struct net_device *dev) { - dev_put_track(dev, NULL); + netdev_put(dev, NULL); } -static inline void dev_replace_track(struct net_device *odev, - struct net_device *ndev, - netdevice_tracker *tracker, - gfp_t gfp) +static inline void netdev_ref_replace(struct net_device *odev, + struct net_device *ndev, + netdevice_tracker *tracker, + gfp_t gfp) { if (odev) netdev_tracker_free(odev, tracker); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c39d910d4b45..9287712ad977 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1923,7 +1923,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; - dev_put_track(dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); } } #else diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 839f2020b015..e3dff2df6f54 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -615,7 +615,7 @@ static int vlan_dev_init(struct net_device *dev) return -ENOMEM; /* Get vlan's reference to real_dev */ - dev_hold_track(real_dev, &vlan->dev_tracker, GFP_KERNEL); + netdev_hold(real_dev, &vlan->dev_tracker, GFP_KERNEL); return 0; } @@ -852,7 +852,7 @@ static void vlan_dev_free(struct net_device *dev) vlan->vlan_pcpu_stats = NULL; /* Get rid of the vlan's reference to real_dev */ - dev_put_track(vlan->real_dev, &vlan->dev_tracker); + netdev_put(vlan->real_dev, &vlan->dev_tracker); } void vlan_setup(struct net_device *dev) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 95393bb2760b..1a5c0b071aa3 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -102,7 +102,8 @@ again: ax25_disconnect(s, ENETUNREACH); s->ax25_dev = NULL; if (sk->sk_socket) { - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, + &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } ax25_cb_del(s); @@ -1065,7 +1066,7 @@ static int ax25_release(struct socket *sock) del_timer_sync(&ax25->t3timer); del_timer_sync(&ax25->idletimer); } - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -1146,7 +1147,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); - dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + netdev_hold(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); } done: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 95a76d571c44..ab88b6ac5401 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -60,7 +60,7 @@ void ax25_dev_device_up(struct net_device *dev) refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; - dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); ax25_dev->forward = NULL; ax25_dev->device_up = true; @@ -136,7 +136,7 @@ unlock_put: spin_unlock_bh(&ax25_dev_lock); ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; - dev_put_track(dev, &ax25_dev->dev_tracker); + netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -205,7 +205,7 @@ void __exit ax25_dev_free(void) ax25_dev = ax25_dev_list; while (ax25_dev != NULL) { s = ax25_dev; - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev = ax25_dev->next; kfree(s); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 47fcbade7389..a84a7cfb9d6d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -274,7 +274,7 @@ static void destroy_nbp(struct net_bridge_port *p) p->br = NULL; p->dev = NULL; - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kobject_put(&p->kobj); } @@ -423,7 +423,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return ERR_PTR(-ENOMEM); p->br = br; - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; @@ -434,7 +434,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, br_stp_port_timer_init(p); err = br_multicast_add_port(p); if (err) { - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); p = ERR_PTR(err); } @@ -615,7 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -725,7 +725,7 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: br_multicast_del_port(p); - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: diff --git a/net/core/dev.c b/net/core/dev.c index 08ce317fcec8..c9d96b85a825 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7463,7 +7463,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->ref_nr = 1; adj->private = private; adj->ignore = false; - dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL); + netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL); pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); @@ -7492,7 +7492,7 @@ remove_symlinks: if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree(adj); return ret; @@ -7534,7 +7534,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, list_del_rcu(&adj->list); pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - dev_put_track(adj_dev, &adj->dev_tracker); + netdev_put(adj_dev, &adj->dev_tracker); kfree_rcu(adj, rcu); } @@ -10062,7 +10062,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); - dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL); + netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL); list_netdevice(dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -10868,7 +10868,7 @@ void unregister_netdevice_many(struct list_head *head) synchronize_net(); list_for_each_entry(dev, head, unreg_list) { - dev_put_track(dev, &dev->dev_registered_tracker); + netdev_put(dev, &dev->dev_registered_tracker); net_set_todo(dev); } diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4f6be442ae7e..7674bb9f3076 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -384,10 +384,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -ENODEV; if (!netif_is_bridge_master(dev)) return -EOPNOTSUPP; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); - dev_put_track(dev, &dev_tracker); + netdev_put(dev, &dev_tracker); rtnl_lock(); return err; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 4ad1decce724..804d02fc245f 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -864,7 +864,8 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC); + netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker, + GFP_ATOMIC); return hw_metadata; @@ -880,7 +881,7 @@ free_hw_metadata: static void net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata) { - dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker); + netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); diff --git a/net/core/dst.c b/net/core/dst.c index d16c2c9bfebd..bc9c9be4e080 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,7 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - dev_hold_track(dev, &dst->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -117,7 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - dev_put_track(dst->dev, &dst->dev_tracker); + netdev_put(dst->dev, &dst->dev_tracker); lwtstate_put(dst->lwtstate); @@ -159,8 +159,8 @@ void dst_dev_put(struct dst_entry *dst) dst->input = dst_discard; dst->output = dst_discard_out; dst->dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, &dst->dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, + GFP_ATOMIC); } EXPORT_SYMBOL(dst_dev_put); diff --git a/net/core/failover.c b/net/core/failover.c index dcaa92a85ea2..864d2d83eff4 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -252,7 +252,7 @@ struct failover *failover_register(struct net_device *dev, return ERR_PTR(-ENOMEM); rcu_assign_pointer(failover->ops, ops); - dev_hold_track(dev, &failover->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL); dev->priv_flags |= IFF_FAILOVER; rcu_assign_pointer(failover->failover_dev, dev); @@ -285,7 +285,7 @@ void failover_unregister(struct failover *failover) failover_dev->name); failover_dev->priv_flags &= ~IFF_FAILOVER; - dev_put_track(failover_dev, &failover->dev_tracker); + netdev_put(failover_dev, &failover->dev_tracker); spin_lock(&failover_lock); list_del(&failover->list); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a244d3bade7d..aa6cb1f90966 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -110,7 +110,7 @@ static void linkwatch_add_event(struct net_device *dev) spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); - dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 54625287ee5b..d8ec70622ecb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -624,7 +624,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, memcpy(n->primary_key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC); /* Protocol specific setup. */ if (tbl->constructor && (error = tbl->constructor(n)) < 0) { @@ -770,10 +770,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &n->dev_tracker, GFP_KERNEL); if (tbl->pconstructor && tbl->pconstructor(n)) { - dev_put_track(dev, &n->dev_tracker); + netdev_put(dev, &n->dev_tracker); kfree(n); n = NULL; goto out; @@ -805,7 +805,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); return 0; } @@ -838,7 +838,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - dev_put_track(n->dev, &n->dev_tracker); + netdev_put(n->dev, &n->dev_tracker); kfree(n); } return -ENOENT; @@ -879,7 +879,7 @@ void neigh_destroy(struct neighbour *neigh) if (dev->netdev_ops->ndo_neigh_destroy) dev->netdev_ops->ndo_neigh_destroy(dev, neigh); - dev_put_track(dev, &neigh->dev_tracker); + netdev_put(dev, &neigh->dev_tracker); neigh_parms_put(neigh->parms); neigh_dbg(2, "neigh %p is destroyed\n", neigh); @@ -1671,13 +1671,13 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); - dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; write_pnet(&p->net, net); p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { - dev_put_track(dev, &p->dev_tracker); + netdev_put(dev, &p->dev_tracker); kfree(p); return NULL; } @@ -1708,7 +1708,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - dev_put_track(parms->dev, &parms->dev_tracker); + netdev_put(parms->dev, &parms->dev_tracker); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e319e242dddf..d49fc974e630 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1016,7 +1016,7 @@ static void rx_queue_release(struct kobject *kobj) #endif memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *rx_queue_namespace(struct kobject *kobj) @@ -1056,7 +1056,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, @@ -1619,7 +1619,7 @@ static void netdev_queue_release(struct kobject *kobj) struct netdev_queue *queue = to_netdev_queue(kobj); memset(kobj, 0, sizeof(*kobj)); - dev_put_track(queue->dev, &queue->dev_tracker); + netdev_put(queue->dev, &queue->dev_tracker); } static const void *netdev_queue_namespace(struct kobject *kobj) @@ -1659,7 +1659,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ - dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL); + netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL); kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index db724463e7cd..5d27067b72d5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -853,7 +853,7 @@ void netpoll_cleanup(struct netpoll *np) if (!np->dev) goto out; __netpoll_cleanup(np); - dev_put_track(np->dev, &np->dev_tracker); + netdev_put(np->dev, &np->dev_tracker); np->dev = NULL; out: rtnl_unlock(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 84b62cd7bc57..88906ba6d9a7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2100,7 +2100,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn, /* Clean old setups */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } @@ -3807,7 +3807,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); out2: - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); out1: #ifdef CONFIG_XFRM free_SAs(pkt_dev); @@ -3901,7 +3901,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Dis-associate from the interface */ if (pkt_dev->odev) { - dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker); + netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker); pkt_dev->odev = NULL; } diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 326e14ee05db..d05ff6a17a1f 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2010,7 +2010,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) * removal of the device. */ busy = true; - dev_hold_track(dev, &dev_tracker, GFP_KERNEL); + netdev_hold(dev, &dev_tracker, GFP_KERNEL); rtnl_unlock(); if (rc == 0) { @@ -2034,7 +2034,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } rtnl_lock(); - dev_put_track(dev, &dev_tracker); + netdev_put(dev, &dev_tracker); busy = false; (void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5fe8f4ae2ceb..e26079e11835 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -402,7 +402,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ops->cleanup_data(reply_data); genlmsg_end(rskb, reply_payload); - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); kfree(reply_data); kfree(req_info); return genlmsg_reply(rskb, info); @@ -414,7 +414,7 @@ err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); err_dev: - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); kfree(reply_data); kfree(req_info); return ret; @@ -550,7 +550,7 @@ static int ethnl_default_start(struct netlink_callback *cb) * same parser as for non-dump (doit) requests is used, it * would take reference to the device if it finds one */ - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); req_info->dev = NULL; } if (ret < 0) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 7919ddb2371c..c0d587611854 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -237,7 +237,7 @@ struct ethnl_req_info { static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) { - dev_put_track(req_info->dev, &req_info->dev_tracker); + netdev_put(req_info->dev, &req_info->dev_tracker); } /** diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b2366ad540e6..92b778e423df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -244,7 +244,7 @@ void in_dev_finish_destroy(struct in_device *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + netdev_put(dev, &idev->dev_tracker); if (!idev->dead) pr_err("Freeing alive in_device %p\n", idev); else @@ -272,7 +272,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) dev_disable_lro(dev); /* Reference in_dev->dev */ - dev_hold_track(dev, &in_dev->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a57ba23571c9..a5439a8414d4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -211,7 +211,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) void fib_nh_common_release(struct fib_nh_common *nhc) { - dev_put_track(nhc->nhc_dev, &nhc->nhc_dev_tracker); + netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); @@ -1057,7 +1057,8 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); if (!err) { nh->fib_nh_dev = fib6_nh.fib_nh_dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_KERNEL); + netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, + GFP_KERNEL); nh->fib_nh_oif = nh->fib_nh_dev->ifindex; nh->fib_nh_scope = RT_SCOPE_LINK; @@ -1141,7 +1142,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; nh->fib_nh_dev = dev; - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_LINK; return 0; } @@ -1195,7 +1196,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, "No egress device for nexthop gateway"); goto out; } - dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; @@ -1229,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, } nh->fib_nh_dev = in_dev->dev; - dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); + netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_HOST; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 13e6329784fb..8324e541d193 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -691,7 +691,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + netdev_put(dev, &v->dev_tracker); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 356f535f3443..2d16bcc7d346 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1550,9 +1550,8 @@ void rt_flush_dev(struct net_device *dev) if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; - dev_replace_track(dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(dev, blackhole_netdev, + &rt->dst.dev_tracker, GFP_ATOMIC); list_move(&rt->rt_uncached, &ul->quarantine); } spin_unlock_bh(&ul->lock); @@ -2851,7 +2850,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->output = dst_discard_out; new->dev = net->loopback_dev; - dev_hold_track(new->dev, &new->dev_tracker, GFP_ATOMIC); + netdev_hold(new->dev, &new->dev_tracker, GFP_ATOMIC); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6fde0b184791..3d0dfa6cf9f9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -75,7 +75,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b1932502e9e..3497ad1362c0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -398,13 +398,13 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (ndev->cnf.forwarding) dev_disable_lro(dev); /* We refer to the device */ - dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL); if (snmp6_alloc_dev(ndev) < 0) { netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", __func__); neigh_parms_release(&nd_tbl, ndev->nd_parms); - dev_put_track(dev, &ndev->dev_tracker); + netdev_put(dev, &ndev->dev_tracker); kfree(ndev); return ERR_PTR(err); } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 881d1477d24a..507a8353a6bd 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -263,7 +263,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); #endif - dev_put_track(dev, &idev->dev_tracker); + netdev_put(dev, &idev->dev_tracker); if (!idev->dead) { pr_warn("Freeing alive inet6 device %p\n", idev); return; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4e37f7c29900..3e22cbe5966a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -398,7 +398,7 @@ static void ip6erspan_tunnel_uninit(struct net_device *dev) ip6erspan_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } static void ip6gre_tunnel_uninit(struct net_device *dev) @@ -411,7 +411,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) if (ign->fb_tunnel_dev == dev) WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } @@ -1495,7 +1495,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } ip6gre_tnl_init_features(dev); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; cleanup_dst_cache_init: @@ -1887,7 +1887,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1); - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; cleanup_dst_cache_init: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 19325b7600bb..689de5eb604e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -381,7 +381,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) else ip6_tnl_unlink(ip6n, t); dst_cache_reset(&t->dst_cache); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } /** @@ -1889,7 +1889,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); return 0; destroy_dst: diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3a434d75925c..8fe59a79e800 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -293,7 +293,7 @@ static void vti6_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); - dev_put_track(dev, &t->dev_tracker); + netdev_put(dev, &t->dev_tracker); } static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -936,7 +936,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); return 0; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4e74bc61a3db..d4aad41c9849 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -741,7 +741,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, if ((v->flags & MIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); - dev_put_track(dev, &v->dev_tracker); + netdev_put(dev, &v->dev_tracker); return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d25dc83bac62..0be01a4d48c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -182,9 +182,9 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) if (rt_dev == dev) { rt->dst.dev = blackhole_netdev; - dev_replace_track(rt_dev, blackhole_netdev, - &rt->dst.dev_tracker, - GFP_ATOMIC); + netdev_ref_replace(rt_dev, blackhole_netdev, + &rt->dst.dev_tracker, + GFP_ATOMIC); handled = true; } if (handled) @@ -607,7 +607,7 @@ static void rt6_probe_deferred(struct work_struct *w) addrconf_addr_solict_mult(&work->target, &mcaddr); ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); - dev_put_track(work->dev, &work->dev_tracker); + netdev_put(work->dev, &work->dev_tracker); kfree(work); } @@ -661,7 +661,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) } else { INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; - dev_hold_track(dev, &work->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC); work->dev = dev; schedule_work(&work->work); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c0b138c20992..4f1721865fda 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -521,7 +521,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_del_prl(tunnel, NULL); } dst_cache_reset(&tunnel->dst_cache); - dev_put_track(dev, &tunnel->dev_tracker); + netdev_put(dev, &tunnel->dev_tracker); } static int ipip6_err(struct sk_buff *skb, u32 info) @@ -1463,7 +1463,7 @@ static int ipip6_tunnel_init(struct net_device *dev) dev->tstats = NULL; return err; } - dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e64e427a51cf..4a4b0e49ec92 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,11 +73,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)xdst->route; xdst->u.dst.dev = dev; - dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) { - dev_put_track(dev, &xdst->u.dst.dev_tracker); + netdev_put(dev, &xdst->u.dst.dev_tracker); return -ENODEV; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7f555d2e5357..da7fe94bea2e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -224,7 +224,7 @@ static int llc_ui_release(struct socket *sock) } else { release_sock(sk); } - dev_put_track(llc->dev, &llc->dev_tracker); + netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); llc_sk_free(sk); out: diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b498dac4e1e0..2f61d5bdce1a 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -115,7 +115,7 @@ error_master_upper_dev_unlink: error_unlock: rtnl_unlock(); error_put: - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -137,7 +137,7 @@ static void vport_netdev_free(struct rcu_head *rcu) { struct vport *vport = container_of(rcu, struct vport, rcu); - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); ovs_vport_free(vport); } @@ -173,7 +173,7 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev); - dev_put_track(vport->dev, &vport->dev_tracker); + netdev_put(vport->dev, &vport->dev_tracker); vport->dev = NULL; rtnl_unlock(); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ca6e92a22923..d08c4728523b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3134,7 +3134,7 @@ static int packet_release(struct socket *sock) packet_cached_dev_reset(po); if (po->prot_hook.dev) { - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); @@ -3235,15 +3235,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; - dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); if (unlikely(unlisted)) { po->prot_hook.dev = NULL; WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { - dev_hold_track(dev, &po->prot_hook.dev_tracker, - GFP_ATOMIC); + netdev_hold(dev, &po->prot_hook.dev_tracker, + GFP_ATOMIC); po->prot_hook.dev = dev; WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); @@ -4167,8 +4167,8 @@ static int packet_notifier(struct notifier_block *this, if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); - dev_put_track(po->prot_hook.dev, - &po->prot_hook.dev_tracker); + netdev_put(po->prot_hook.dev, + &po->prot_hook.dev_tracker); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index ebb92fb072ab..a1d70cf86843 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -79,7 +79,7 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -181,7 +181,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, mac_header_xmit = dev_is_mac_header_xmit(ndev); odev = rcu_replace_pointer(m->tcfm_dev, ndev, lockdep_is_held(&m->tcf_lock)); - dev_put_track(odev, &m->tcfm_dev_tracker); + netdev_put(odev, &m->tcfm_dev_tracker); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; } @@ -402,7 +402,7 @@ static int mirred_device_event(struct notifier_block *unused, list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3c0e8ea2dbb..bf87b50837a8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1292,7 +1292,7 @@ err_out5: if (ops->destroy) ops->destroy(sch); err_out3: - dev_put_track(dev, &sch->dev_tracker); + netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: module_put(ops->owner); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index dba0b3e24af5..cc6eabee2830 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -541,7 +541,7 @@ static void dev_watchdog(struct timer_list *t) spin_unlock(&dev->tx_global_lock); if (release) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); } void __netdev_watchdog_up(struct net_device *dev) @@ -551,7 +551,8 @@ void __netdev_watchdog_up(struct net_device *dev) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) - dev_hold_track(dev, &dev->watchdog_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->watchdog_dev_tracker, + GFP_ATOMIC); } } EXPORT_SYMBOL_GPL(__netdev_watchdog_up); @@ -565,7 +566,7 @@ static void dev_watchdog_down(struct net_device *dev) { netif_tx_lock_bh(dev); if (del_timer(&dev->watchdog_timer)) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); netif_tx_unlock_bh(dev); } @@ -975,7 +976,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - dev_hold_track(dev, &sch->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL); refcount_set(&sch->refcnt, 1); return sch; @@ -1067,7 +1068,7 @@ static void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put_track(qdisc_dev(qdisc), &qdisc->dev_tracker); + netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7055ed10e316..4c3bf6db7038 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -120,7 +120,8 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + netdev_put(pnetelem->ndev, + &pnetelem->dev_tracker); pr_warn_ratelimited("smc: net device %s " "erased user defined " "pnetid %.16s\n", @@ -196,7 +197,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { - dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); + netdev_hold(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); pnetelem->ndev = ndev; rc = 0; pr_warn_ratelimited("smc: adding net device %s with " @@ -227,7 +228,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { - dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); + netdev_put(pnetelem->ndev, &pnetelem->dev_tracker); pnetelem->ndev = NULL; rc = 0; pr_warn_ratelimited("smc: removing net device %s with " diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 474f76383033..8cc42aea19c7 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -64,7 +64,7 @@ void switchdev_deferred_process(void) while ((dfitem = switchdev_deferred_dequeue())) { dfitem->func(dfitem->dev, dfitem->data); - dev_put_track(dfitem->dev, &dfitem->dev_tracker); + netdev_put(dfitem->dev, &dfitem->dev_tracker); kfree(dfitem); } } @@ -91,7 +91,7 @@ static int switchdev_deferred_enqueue(struct net_device *dev, dfitem->dev = dev; dfitem->func = func; memcpy(dfitem->data, data, data_len); - dev_hold_track(dev, &dfitem->dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dfitem->dev_tracker, GFP_ATOMIC); spin_lock_bh(&deferred_lock); list_add_tail(&dfitem->list, &deferred); spin_unlock_bh(&deferred_lock); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 932c87b98eca..35cac7733fd3 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -788,7 +788,7 @@ int tipc_attach_loopback(struct net *net) if (!dev) return -ENODEV; - dev_hold_track(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); + netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL); tn->loopback_pt.dev = dev; tn->loopback_pt.type = htons(ETH_P_TIPC); tn->loopback_pt.func = tipc_loopback_rcv_pkt; @@ -801,7 +801,7 @@ void tipc_detach_loopback(struct net *net) struct tipc_net *tn = tipc_net(net); dev_remove_pack(&tn->loopback_pt); - dev_put_track(net->loopback_dev, &tn->loopback_pt.dev_tracker); + netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker); } /* Caller should hold rtnl_lock to protect the bearer */ diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 35c7e89b2e7d..637ca8838436 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -275,7 +275,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = NULL; xso->dir = 0; xso->real_dev = NULL; - dev_put_track(dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); if (err != -EOPNOTSUPP) return err; -- cgit v1.2.3 From 09cca53c1656960c38c04bb12da30f61952ca660 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:32 -0700 Subject: vlan: adopt u64_stats_t As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Add READ_ONCE() when reading rx_errors & tx_dropped. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 18 +++++++++--------- include/linux/if_macvlan.h | 6 +++--- include/linux/if_vlan.h | 10 +++++----- net/8021q/vlan_core.c | 6 +++--- net/8021q/vlan_dev.c | 18 +++++++++--------- 5 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 5b46a6526fc6..1080d6ebff63 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -575,8 +575,8 @@ static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->tx_packets++; - pcpu_stats->tx_bytes += len; + u64_stats_inc(&pcpu_stats->tx_packets); + u64_stats_add(&pcpu_stats->tx_bytes, len); u64_stats_update_end(&pcpu_stats->syncp); } else { this_cpu_inc(vlan->pcpu_stats->tx_dropped); @@ -949,11 +949,11 @@ static void macvlan_dev_get_stats64(struct net_device *dev, p = per_cpu_ptr(vlan->pcpu_stats, i); do { start = u64_stats_fetch_begin_irq(&p->syncp); - rx_packets = p->rx_packets; - rx_bytes = p->rx_bytes; - rx_multicast = p->rx_multicast; - tx_packets = p->tx_packets; - tx_bytes = p->tx_bytes; + rx_packets = u64_stats_read(&p->rx_packets); + rx_bytes = u64_stats_read(&p->rx_bytes); + rx_multicast = u64_stats_read(&p->rx_multicast); + tx_packets = u64_stats_read(&p->tx_packets); + tx_bytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rx_packets; @@ -964,8 +964,8 @@ static void macvlan_dev_get_stats64(struct net_device *dev, /* rx_errors & tx_dropped are u32, updated * without syncp protection. */ - rx_errors += p->rx_errors; - tx_dropped += p->tx_dropped; + rx_errors += READ_ONCE(p->rx_errors); + tx_dropped += READ_ONCE(p->tx_dropped); } stats->rx_errors = rx_errors; stats->rx_dropped = rx_errors; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index b42294739063..523025106a64 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -46,10 +46,10 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, pcpu_stats = get_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->rx_packets++; - pcpu_stats->rx_bytes += len; + u64_stats_inc(&pcpu_stats->rx_packets); + u64_stats_add(&pcpu_stats->rx_bytes, len); if (multicast) - pcpu_stats->rx_multicast++; + u64_stats_inc(&pcpu_stats->rx_multicast); u64_stats_update_end(&pcpu_stats->syncp); put_cpu_ptr(vlan->pcpu_stats); } else { diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 2be4dd7e90a9..e00c4ee81ff7 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -118,11 +118,11 @@ static inline void vlan_drop_rx_stag_filter_info(struct net_device *dev) * @tx_dropped: number of tx drops */ struct vlan_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_multicast; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_errors; u32 tx_dropped; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index acf8c791f320..5aa8144101dc 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -63,10 +63,10 @@ bool vlan_do_receive(struct sk_buff **skbp) rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++; - rx_stats->rx_bytes += skb->len; + u64_stats_inc(&rx_stats->rx_packets); + u64_stats_add(&rx_stats->rx_bytes, skb->len); if (skb->pkt_type == PACKET_MULTICAST) - rx_stats->rx_multicast++; + u64_stats_inc(&rx_stats->rx_multicast); u64_stats_update_end(&rx_stats->syncp); return true; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e3dff2df6f54..035812b0461c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -128,8 +128,8 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += len; + u64_stats_inc(&stats->tx_packets); + u64_stats_add(&stats->tx_bytes, len); u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); @@ -713,11 +713,11 @@ static void vlan_dev_get_stats64(struct net_device *dev, p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { start = u64_stats_fetch_begin_irq(&p->syncp); - rxpackets = p->rx_packets; - rxbytes = p->rx_bytes; - rxmulticast = p->rx_multicast; - txpackets = p->tx_packets; - txbytes = p->tx_bytes; + rxpackets = u64_stats_read(&p->rx_packets); + rxbytes = u64_stats_read(&p->rx_bytes); + rxmulticast = u64_stats_read(&p->rx_multicast); + txpackets = u64_stats_read(&p->tx_packets); + txbytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rxpackets; @@ -726,8 +726,8 @@ static void vlan_dev_get_stats64(struct net_device *dev, stats->tx_packets += txpackets; stats->tx_bytes += txbytes; /* rx_errors & tx_dropped are u32 */ - rx_errors += p->rx_errors; - tx_dropped += p->tx_dropped; + rx_errors += READ_ONCE(p->rx_errors); + tx_dropped += READ_ONCE(p->tx_dropped); } stats->rx_errors = rx_errors; stats->tx_dropped = tx_dropped; -- cgit v1.2.3 From 9962acefbcb92736c268aafe5f52200948f60f3e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:37 -0700 Subject: net: adopt u64_stats_t in struct pcpu_sw_netstats As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 8 ++++---- drivers/net/usb/usbnet.c | 8 ++++---- drivers/net/vxlan/vxlan_core.c | 8 ++++---- include/linux/netdevice.h | 16 ++++++++-------- include/net/ip_tunnels.h | 4 ++-- net/bridge/br_netlink.c | 8 ++++---- net/bridge/br_vlan.c | 36 ++++++++++++++++++++---------------- net/core/dev.c | 18 +++++++++--------- net/dsa/slave.c | 8 ++++---- 9 files changed, 59 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 815738c0e067..c881e1bf6f6e 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -523,8 +523,8 @@ static void count_tx(struct net_device *dev, int ret, int len) struct pcpu_sw_netstats *stats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&stats->syncp); - stats->tx_packets++; - stats->tx_bytes += len; + u64_stats_inc(&stats->tx_packets); + u64_stats_add(&stats->tx_bytes, len); u64_stats_update_end(&stats->syncp); } } @@ -825,8 +825,8 @@ static void count_rx(struct net_device *dev, int len) struct pcpu_sw_netstats *stats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += len; + u64_stats_inc(&stats->rx_packets); + u64_stats_add(&stats->rx_bytes, len); u64_stats_update_end(&stats->syncp); } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1cb6dab3e2d0..dc79811535c2 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -337,8 +337,8 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) skb->protocol = eth_type_trans (skb, dev->net); flags = u64_stats_update_begin_irqsave(&stats64->syncp); - stats64->rx_packets++; - stats64->rx_bytes += skb->len; + u64_stats_inc(&stats64->rx_packets); + u64_stats_add(&stats64->rx_bytes, skb->len); u64_stats_update_end_irqrestore(&stats64->syncp, flags); netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n", @@ -1258,8 +1258,8 @@ static void tx_complete (struct urb *urb) unsigned long flags; flags = u64_stats_update_begin_irqsave(&stats64->syncp); - stats64->tx_packets += entry->packets; - stats64->tx_bytes += entry->length; + u64_stats_add(&stats64->tx_packets, entry->packets); + u64_stats_add(&stats64->tx_bytes, entry->length); u64_stats_update_end_irqrestore(&stats64->syncp, flags); } else { dev->net->stats.tx_errors++; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 265d4a0245e7..8b0710b576c2 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2385,15 +2385,15 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni); u64_stats_update_begin(&tx_stats->syncp); - tx_stats->tx_packets++; - tx_stats->tx_bytes += len; + u64_stats_inc(&tx_stats->tx_packets); + u64_stats_add(&tx_stats->tx_bytes, len); u64_stats_update_end(&tx_stats->syncp); vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len); if (__netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); - rx_stats->rx_packets++; - rx_stats->rx_bytes += len; + u64_stats_inc(&rx_stats->rx_packets); + u64_stats_add(&rx_stats->rx_bytes, len); u64_stats_update_end(&rx_stats->syncp); vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX, len); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e2e5088888b1..89afa4f7747d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2636,10 +2636,10 @@ struct packet_offload { /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); @@ -2656,8 +2656,8 @@ static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int l struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->rx_bytes += len; - tstats->rx_packets++; + u64_stats_add(&tstats->rx_bytes, len); + u64_stats_inc(&tstats->rx_packets); u64_stats_update_end(&tstats->syncp); } @@ -2668,8 +2668,8 @@ static inline void dev_sw_netstats_tx_add(struct net_device *dev, struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += len; - tstats->tx_packets += packets; + u64_stats_add(&tstats->tx_bytes, len); + u64_stats_add(&tstats->tx_packets, packets); u64_stats_update_end(&tstats->syncp); } diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c24fa934221d..70cbc4a72669 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -456,8 +456,8 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); } else { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index bb01776d2d88..1ef14a099c6b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1770,10 +1770,10 @@ static int br_fill_linkxstats(struct sk_buff *skb, if (v->vid == pvid) vxi.flags |= BRIDGE_VLAN_INFO_PVID; br_vlan_get_stats(v, &stats); - vxi.rx_bytes = stats.rx_bytes; - vxi.rx_packets = stats.rx_packets; - vxi.tx_bytes = stats.tx_bytes; - vxi.tx_packets = stats.tx_packets; + vxi.rx_bytes = u64_stats_read(&stats.rx_bytes); + vxi.rx_packets = u64_stats_read(&stats.rx_packets); + vxi.tx_bytes = u64_stats_read(&stats.tx_bytes); + vxi.tx_packets = u64_stats_read(&stats.tx_packets); if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) goto nla_put_failure; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 0f5e75ccac79..6e53dc991409 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -505,8 +505,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); - stats->tx_bytes += skb->len; - stats->tx_packets++; + u64_stats_add(&stats->tx_bytes, skb->len); + u64_stats_inc(&stats->tx_packets); u64_stats_update_end(&stats->syncp); } @@ -624,8 +624,8 @@ static bool __allowed_ingress(const struct net_bridge *br, if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { stats = this_cpu_ptr(v->stats); u64_stats_update_begin(&stats->syncp); - stats->rx_bytes += skb->len; - stats->rx_packets++; + u64_stats_add(&stats->rx_bytes, skb->len); + u64_stats_inc(&stats->rx_packets); u64_stats_update_end(&stats->syncp); } @@ -1379,16 +1379,16 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, cpu_stats = per_cpu_ptr(v->stats, i); do { start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); - rxpackets = cpu_stats->rx_packets; - rxbytes = cpu_stats->rx_bytes; - txbytes = cpu_stats->tx_bytes; - txpackets = cpu_stats->tx_packets; + rxpackets = u64_stats_read(&cpu_stats->rx_packets); + rxbytes = u64_stats_read(&cpu_stats->rx_bytes); + txbytes = u64_stats_read(&cpu_stats->tx_bytes); + txpackets = u64_stats_read(&cpu_stats->tx_packets); } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; - stats->tx_bytes += txbytes; - stats->tx_packets += txpackets; + u64_stats_add(&stats->rx_packets, rxpackets); + u64_stats_add(&stats->rx_bytes, rxbytes); + u64_stats_add(&stats->tx_bytes, txbytes); + u64_stats_add(&stats->tx_packets, txpackets); } } @@ -1779,14 +1779,18 @@ static bool br_vlan_stats_fill(struct sk_buff *skb, return false; br_vlan_get_stats(v, &stats); - if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, stats.rx_bytes, + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, + u64_stats_read(&stats.rx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS, - stats.rx_packets, BRIDGE_VLANDB_STATS_PAD) || - nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, stats.tx_bytes, + u64_stats_read(&stats.rx_packets), + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, + u64_stats_read(&stats.tx_bytes), BRIDGE_VLANDB_STATS_PAD) || nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS, - stats.tx_packets, BRIDGE_VLANDB_STATS_PAD)) + u64_stats_read(&stats.tx_packets), + BRIDGE_VLANDB_STATS_PAD)) goto out_err; nla_nest_end(skb, nest); diff --git a/net/core/dev.c b/net/core/dev.c index c9d96b85a825..511cf5d3aade 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10459,23 +10459,23 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, int cpu; for_each_possible_cpu(cpu) { + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; const struct pcpu_sw_netstats *stats; - struct pcpu_sw_netstats tmp; unsigned int start; stats = per_cpu_ptr(netstats, cpu); do { start = u64_stats_fetch_begin_irq(&stats->syncp); - tmp.rx_packets = stats->rx_packets; - tmp.rx_bytes = stats->rx_bytes; - tmp.tx_packets = stats->tx_packets; - tmp.tx_bytes = stats->tx_bytes; + rx_packets = u64_stats_read(&stats->rx_packets); + rx_bytes = u64_stats_read(&stats->rx_bytes); + tx_packets = u64_stats_read(&stats->tx_packets); + tx_bytes = u64_stats_read(&stats->tx_bytes); } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); - s->rx_packets += tmp.rx_packets; - s->rx_bytes += tmp.rx_bytes; - s->tx_packets += tmp.tx_packets; - s->tx_bytes += tmp.tx_bytes; + s->rx_packets += rx_packets; + s->rx_bytes += rx_bytes; + s->tx_packets += tx_packets; + s->tx_bytes += tx_bytes; } } EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 801a5d445833..2e1ac638d135 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -935,10 +935,10 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, s = per_cpu_ptr(dev->tstats, i); do { start = u64_stats_fetch_begin_irq(&s->syncp); - tx_packets = s->tx_packets; - tx_bytes = s->tx_bytes; - rx_packets = s->rx_packets; - rx_bytes = s->rx_bytes; + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); } while (u64_stats_fetch_retry_irq(&s->syncp, start)); data[0] += tx_packets; data[1] += tx_bytes; -- cgit v1.2.3 From 9ec321aba2eaca109139a2a67c65ede7f07bd8ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 08:46:40 -0700 Subject: team: adopt u64_stats_t As explained in commit 316580b69d0a ("u64_stats: provide u64_stats_t type") we should use u64_stats_t and related accessors to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- drivers/net/team/team.c | 26 +++++++++++++------------- include/linux/if_team.h | 10 +++++----- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b07dde6f0abf..aac133a1e27a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -749,10 +749,10 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) pcpu_stats = this_cpu_ptr(team->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->rx_packets++; - pcpu_stats->rx_bytes += skb->len; + u64_stats_inc(&pcpu_stats->rx_packets); + u64_stats_add(&pcpu_stats->rx_bytes, skb->len); if (skb->pkt_type == PACKET_MULTICAST) - pcpu_stats->rx_multicast++; + u64_stats_inc(&pcpu_stats->rx_multicast); u64_stats_update_end(&pcpu_stats->syncp); skb->dev = team->dev; @@ -1720,8 +1720,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) pcpu_stats = this_cpu_ptr(team->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->tx_packets++; - pcpu_stats->tx_bytes += len; + u64_stats_inc(&pcpu_stats->tx_packets); + u64_stats_add(&pcpu_stats->tx_bytes, len); u64_stats_update_end(&pcpu_stats->syncp); } else { this_cpu_inc(team->pcpu_stats->tx_dropped); @@ -1854,11 +1854,11 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) p = per_cpu_ptr(team->pcpu_stats, i); do { start = u64_stats_fetch_begin_irq(&p->syncp); - rx_packets = p->rx_packets; - rx_bytes = p->rx_bytes; - rx_multicast = p->rx_multicast; - tx_packets = p->tx_packets; - tx_bytes = p->tx_bytes; + rx_packets = u64_stats_read(&p->rx_packets); + rx_bytes = u64_stats_read(&p->rx_bytes); + rx_multicast = u64_stats_read(&p->rx_multicast); + tx_packets = u64_stats_read(&p->tx_packets); + tx_bytes = u64_stats_read(&p->tx_bytes); } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rx_packets; @@ -1870,9 +1870,9 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) * rx_dropped, tx_dropped & rx_nohandler are u32, * updated without syncp protection. */ - rx_dropped += p->rx_dropped; - tx_dropped += p->tx_dropped; - rx_nohandler += p->rx_nohandler; + rx_dropped += READ_ONCE(p->rx_dropped); + tx_dropped += READ_ONCE(p->tx_dropped); + rx_nohandler += READ_ONCE(p->rx_nohandler); } stats->rx_dropped = rx_dropped; stats->tx_dropped = tx_dropped; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index add607943c95..fc985e5c739d 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -12,11 +12,11 @@ #include struct team_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_multicast; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_dropped; u32 tx_dropped; -- cgit v1.2.3 From 21ab562c1f65e583b5d89081acaf096805522482 Mon Sep 17 00:00:00 2001 From: Po Hao Huang Date: Wed, 8 Jun 2022 19:32:22 +0800 Subject: ieee80211: add trigger frame definition Define trigger stype of control frame, and its checking function, struct and trigger type within common_info of trigger. Signed-off-by: Po Hao Huang Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220608113224.11193-2-pkshih@realtek.com --- include/linux/ieee80211.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 75d40acb60c1..5c65ae6b8154 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -76,6 +76,7 @@ #define IEEE80211_STYPE_ACTION 0x00D0 /* control */ +#define IEEE80211_STYPE_TRIGGER 0x0020 #define IEEE80211_STYPE_CTL_EXT 0x0060 #define IEEE80211_STYPE_BACK_REQ 0x0080 #define IEEE80211_STYPE_BACK 0x0090 @@ -295,6 +296,17 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_HT_CTL_LEN 4 +/* trigger type within common_info of trigger frame */ +#define IEEE80211_TRIGGER_TYPE_MASK 0xf +#define IEEE80211_TRIGGER_TYPE_BASIC 0x0 +#define IEEE80211_TRIGGER_TYPE_BFRP 0x1 +#define IEEE80211_TRIGGER_TYPE_MU_BAR 0x2 +#define IEEE80211_TRIGGER_TYPE_MU_RTS 0x3 +#define IEEE80211_TRIGGER_TYPE_BSRP 0x4 +#define IEEE80211_TRIGGER_TYPE_GCR_MU_BAR 0x5 +#define IEEE80211_TRIGGER_TYPE_BQRP 0x6 +#define IEEE80211_TRIGGER_TYPE_NFRP 0x7 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; @@ -324,6 +336,15 @@ struct ieee80211_qos_hdr { __le16 qos_ctrl; } __packed __aligned(2); +struct ieee80211_trigger { + __le16 frame_control; + __le16 duration; + u8 ra[ETH_ALEN]; + u8 ta[ETH_ALEN]; + __le64 common_info; + u8 variable[]; +} __packed __aligned(2); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder @@ -729,6 +750,16 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_trigger - check if frame is trigger frame + * @fc: frame control field in little-endian byteorder + */ +static inline bool ieee80211_is_trigger(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_TRIGGER); +} + /** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From ccd8a9351f7b44bc1c0c8e4d39c0d6593b106fc4 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Fri, 10 Jun 2022 23:30:08 +0900 Subject: can: skb: move can_dropped_invalid_skb() and can_skb_headroom_valid() to skb.c The functions can_dropped_invalid_skb() and can_skb_headroom_valid() grew a lot over the years to a point which it does not make much sense to have them defined as static inline in header files. Move those two functions to the .c counterpart of skb.h. can_skb_headroom_valid()'s only caller being can_dropped_invalid_skb(), the declaration is removed from the header. Only can_dropped_invalid_skb() gets its symbol exported. While doing so, do a small cleanup: add brackets around the else block in can_dropped_invalid_skb(). Link: https://lore.kernel.org/all/20220610143009.323579-7-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Reported-by: kernel test robot Acked-by: Max Staudt Tested-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/skb.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/can/skb.h | 59 +---------------------------------------------- 2 files changed, 59 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index a4208f125b76..dc9da76c0470 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -259,3 +259,61 @@ struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf) return skb; } EXPORT_SYMBOL_GPL(alloc_can_err_skb); + +/* Check for outgoing skbs that have not been created by the CAN subsystem */ +static bool can_skb_headroom_valid(struct net_device *dev, struct sk_buff *skb) +{ + /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ + if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) + return false; + + /* af_packet does not apply CAN skb specific settings */ + if (skb->ip_summed == CHECKSUM_NONE) { + /* init headroom */ + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* perform proper loopback on capable devices */ + if (dev->flags & IFF_ECHO) + skb->pkt_type = PACKET_LOOPBACK; + else + skb->pkt_type = PACKET_HOST; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + } + + return true; +} + +/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ +bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) +{ + const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + + if (skb->protocol == htons(ETH_P_CAN)) { + if (unlikely(skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) + goto inval_skb; + } else if (skb->protocol == htons(ETH_P_CANFD)) { + if (unlikely(skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) + goto inval_skb; + } else { + goto inval_skb; + } + + if (!can_skb_headroom_valid(dev, skb)) + goto inval_skb; + + return false; + +inval_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; + return true; +} +EXPORT_SYMBOL_GPL(can_dropped_invalid_skb); diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index fdb22b00674a..182749e858b3 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -31,6 +31,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); +bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb); /* * The struct can_skb_priv is used to transport additional information along @@ -96,64 +97,6 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return nskb; } -/* Check for outgoing skbs that have not been created by the CAN subsystem */ -static inline bool can_skb_headroom_valid(struct net_device *dev, - struct sk_buff *skb) -{ - /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ - if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) - return false; - - /* af_packet does not apply CAN skb specific settings */ - if (skb->ip_summed == CHECKSUM_NONE) { - /* init headroom */ - can_skb_prv(skb)->ifindex = dev->ifindex; - can_skb_prv(skb)->skbcnt = 0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* perform proper loopback on capable devices */ - if (dev->flags & IFF_ECHO) - skb->pkt_type = PACKET_LOOPBACK; - else - skb->pkt_type = PACKET_HOST; - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - } - - return true; -} - -/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ -static inline bool can_dropped_invalid_skb(struct net_device *dev, - struct sk_buff *skb) -{ - const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - - if (skb->protocol == htons(ETH_P_CAN)) { - if (unlikely(skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) - goto inval_skb; - } else if (skb->protocol == htons(ETH_P_CANFD)) { - if (unlikely(skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) - goto inval_skb; - } else - goto inval_skb; - - if (!can_skb_headroom_valid(dev, skb)) - goto inval_skb; - - return false; - -inval_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return true; -} - static inline bool can_is_canfd_skb(const struct sk_buff *skb) { /* the CAN specific type of skb is identified by its data length */ -- cgit v1.2.3 From c04245328dd7e915e21ac6395ffd218616e22754 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Fri, 10 Jun 2022 17:10:17 +0800 Subject: net: make __sys_accept4_file() static __sys_accept4_file() isn't used outside of the file, make it static. As the same time, move file_flags and nofile parameters into __sys_accept4_file(). Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- include/linux/socket.h | 4 ---- net/socket.c | 15 ++++++--------- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 17311ad9f9af..414b8c7bb8f7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -428,10 +428,6 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); -extern int __sys_accept4_file(struct file *file, unsigned file_flags, - struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile); extern struct file *do_accept(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); diff --git a/net/socket.c b/net/socket.c index 2bc8773d9dc5..1b6f5e2ebef5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1878,10 +1878,8 @@ out_fd: return ERR_PTR(err); } -int __sys_accept4_file(struct file *file, unsigned file_flags, - struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile) +static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct file *newfile; int newfd; @@ -1892,11 +1890,11 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - newfd = __get_unused_fd_flags(flags, nofile); + newfd = get_unused_fd_flags(flags); if (unlikely(newfd < 0)) return newfd; - newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen, + newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen, flags); if (IS_ERR(newfile)) { put_unused_fd(newfd); @@ -1926,9 +1924,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, f = fdget(fd); if (f.file) { - ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, - upeer_addrlen, flags, - rlimit(RLIMIT_NOFILE)); + ret = __sys_accept4_file(f.file, upeer_sockaddr, + upeer_addrlen, flags); fdput(f); } -- cgit v1.2.3 From 795e10b450a88b2943a241a5eaa6e86ae4f47694 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 7 Jun 2022 15:47:43 +0300 Subject: net/mlx5: Introduce header-modify-pattern ICM properties Added new fields for device memory capabilities, in order to support creation of ICM memory for modify header patterns. Signed-off-by: Erez Shitrit Signed-off-by: Yevgeny Kliteynik Signed-off-by: Leon Romanovsky Acked-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fd7d083a34d3..789d64400744 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1086,11 +1086,14 @@ struct mlx5_ifc_device_mem_cap_bits { u8 log_sw_icm_alloc_granularity[0x6]; u8 log_steering_sw_icm_size[0x8]; - u8 reserved_at_120[0x20]; + u8 reserved_at_120[0x18]; + u8 log_header_modify_pattern_sw_icm_size[0x8]; u8 header_modify_sw_icm_start_address[0x40]; - u8 reserved_at_180[0x80]; + u8 reserved_at_180[0x40]; + + u8 header_modify_pattern_sw_icm_start_address[0x40]; u8 memic_operations[0x20]; -- cgit v1.2.3 From 667658364b2056f344a2769280b939a5e45610be Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 7 Jun 2022 15:47:44 +0300 Subject: net/mlx5: Manage ICM of type modify-header pattern Added support for managing new type of ICM for devices that support sw_owner_v2. Signed-off-by: Erez Shitrit Signed-off-by: Yevgeny Kliteynik Signed-off-by: Leon Romanovsky Acked-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c | 42 ++++++++++++++++++++++++ include/linux/mlx5/driver.h | 1 + 2 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index 3d5e57ff558c..7e02cbe8c3b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -12,13 +12,16 @@ struct mlx5_dm { spinlock_t lock; unsigned long *steering_sw_icm_alloc_blocks; unsigned long *header_modify_sw_icm_alloc_blocks; + unsigned long *header_modify_pattern_sw_icm_alloc_blocks; }; struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) { + u64 header_modify_pattern_icm_blocks = 0; u64 header_modify_icm_blocks = 0; u64 steering_icm_blocks = 0; struct mlx5_dm *dm; + bool support_v2; if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) return NULL; @@ -53,8 +56,27 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) goto err_modify_hdr; } + support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) && + MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) && + MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address); + + if (support_v2) { + header_modify_pattern_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_pattern_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(header_modify_pattern_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dm->header_modify_pattern_sw_icm_alloc_blocks) + goto err_pattern; + } + return dm; +err_pattern: + kfree(dm->header_modify_sw_icm_alloc_blocks); + err_modify_hdr: kfree(dm->steering_sw_icm_alloc_blocks); @@ -86,6 +108,14 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev) kfree(dm->header_modify_sw_icm_alloc_blocks); } + if (dm->header_modify_pattern_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + kfree(dm->header_modify_pattern_sw_icm_alloc_blocks); + } + kfree(dm); } @@ -130,6 +160,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, log_header_modify_sw_icm_size); block_map = dm->header_modify_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; default: return -EINVAL; } @@ -203,6 +240,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); block_map = dm->header_modify_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; default: return -EINVAL; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5040cd774c5a..76d7661e3e63 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -676,6 +676,7 @@ struct mlx5e_resources { enum mlx5_sw_icm_type { MLX5_SW_ICM_TYPE_STEERING, MLX5_SW_ICM_TYPE_HEADER_MODIFY, + MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN, }; #define MLX5_MAX_RESERVED_GIDS 8 -- cgit v1.2.3 From f5d23ee137e51b4e5cd5d263b144d5e6719f6e52 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 8 Jun 2022 13:04:47 -0700 Subject: net/mlx5: Add IFC bits and enums for flow meter Add/extend structure layouts and defines for flow meter. Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 114 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 111 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 604b85dd770a..15ac02eeed4f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -455,6 +455,7 @@ enum { MLX5_OPCODE_UMR = 0x25, + MLX5_OPCODE_ACCESS_ASO = 0x2d, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 789d64400744..91872afb2bfe 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -442,7 +442,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 max_modify_header_actions[0x8]; u8 max_ft_level[0x8]; - u8 reserved_at_40[0x20]; + u8 reserved_at_40[0x6]; + u8 execute_aso[0x1]; + u8 reserved_at_47[0x19]; u8 reserved_at_60[0x2]; u8 reformat_insert[0x1]; @@ -940,7 +942,17 @@ struct mlx5_ifc_qos_cap_bits { u8 max_tsar_bw_share[0x20]; - u8 reserved_at_100[0x700]; + u8 reserved_at_100[0x20]; + + u8 reserved_at_120[0x3]; + u8 log_meter_aso_granularity[0x5]; + u8 reserved_at_128[0x3]; + u8 log_meter_aso_max_alloc[0x5]; + u8 reserved_at_130[0x3]; + u8 log_max_num_meter_aso[0x5]; + u8 reserved_at_138[0x8]; + + u8 reserved_at_140[0x6c0]; }; struct mlx5_ifc_debug_cap_bits { @@ -3280,6 +3292,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800, MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT = 0x1000, MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT = 0x2000, + MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO = 0x4000, }; enum { @@ -3295,6 +3308,38 @@ struct mlx5_ifc_vlan_bits { u8 vid[0xc]; }; +enum { + MLX5_FLOW_METER_COLOR_RED = 0x0, + MLX5_FLOW_METER_COLOR_YELLOW = 0x1, + MLX5_FLOW_METER_COLOR_GREEN = 0x2, + MLX5_FLOW_METER_COLOR_UNDEFINED = 0x3, +}; + +enum { + MLX5_EXE_ASO_FLOW_METER = 0x2, +}; + +struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits { + u8 return_reg_id[0x4]; + u8 aso_type[0x4]; + u8 reserved_at_8[0x14]; + u8 action[0x1]; + u8 init_color[0x2]; + u8 meter_id[0x1]; +}; + +union mlx5_ifc_exe_aso_ctrl { + struct mlx5_ifc_exe_aso_ctrl_flow_meter_bits exe_aso_ctrl_flow_meter; +}; + +struct mlx5_ifc_execute_aso_bits { + u8 valid[0x1]; + u8 reserved_at_1[0x7]; + u8 aso_object_id[0x18]; + + union mlx5_ifc_exe_aso_ctrl exe_aso_ctrl; +}; + struct mlx5_ifc_flow_context_bits { struct mlx5_ifc_vlan_bits push_vlan; @@ -3326,7 +3371,9 @@ struct mlx5_ifc_flow_context_bits { struct mlx5_ifc_fte_match_param_bits match_value; - u8 reserved_at_1200[0x600]; + struct mlx5_ifc_execute_aso_bits execute_aso[4]; + + u8 reserved_at_1300[0x500]; union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[]; }; @@ -5973,7 +6020,9 @@ struct mlx5_ifc_general_obj_in_cmd_hdr_bits { u8 obj_id[0x20]; - u8 reserved_at_60[0x20]; + u8 reserved_at_60[0x3]; + u8 log_obj_range[0x5]; + u8 reserved_at_68[0x18]; }; struct mlx5_ifc_general_obj_out_cmd_hdr_bits { @@ -11373,12 +11422,14 @@ enum { MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc), MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13), MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24), }; enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, }; enum { @@ -11451,6 +11502,61 @@ struct mlx5_ifc_create_encryption_key_in_bits { struct mlx5_ifc_encryption_key_obj_bits encryption_key_object; }; +enum { + MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH = 0x0, + MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2 = 0x1, + MLX5_FLOW_METER_MODE_BYTES_CALC_WITH_L2_IPG = 0x2, + MLX5_FLOW_METER_MODE_NUM_PACKETS = 0x3, +}; + +struct mlx5_ifc_flow_meter_parameters_bits { + u8 valid[0x1]; + u8 bucket_overflow[0x1]; + u8 start_color[0x2]; + u8 both_buckets_on_green[0x1]; + u8 reserved_at_5[0x1]; + u8 meter_mode[0x2]; + u8 reserved_at_8[0x18]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x3]; + u8 cbs_exponent[0x5]; + u8 cbs_mantissa[0x8]; + u8 reserved_at_50[0x3]; + u8 cir_exponent[0x5]; + u8 cir_mantissa[0x8]; + + u8 reserved_at_60[0x20]; + + u8 reserved_at_80[0x3]; + u8 ebs_exponent[0x5]; + u8 ebs_mantissa[0x8]; + u8 reserved_at_90[0x3]; + u8 eir_exponent[0x5]; + u8 eir_mantissa[0x8]; + + u8 reserved_at_a0[0x60]; +}; + +struct mlx5_ifc_flow_meter_aso_obj_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x40]; + + u8 reserved_at_80[0x8]; + u8 meter_aso_access_pd[0x18]; + + u8 reserved_at_a0[0x160]; + + struct mlx5_ifc_flow_meter_parameters_bits flow_meter_parameters[2]; +}; + +struct mlx5_ifc_create_flow_meter_aso_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_flow_meter_aso_obj_bits flow_meter_aso_obj; +}; + struct mlx5_ifc_sampler_obj_bits { u8 modify_field_select[0x40]; -- cgit v1.2.3 From 3e94e61bd44d90070dcda53b647fdc826097ef26 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 8 Jun 2022 13:04:48 -0700 Subject: net/mlx5: Add HW definitions of vport debug counters total_q_under_processor_handle - number of queues in error state due to an async error or errored command. send_queue_priority_update_flow - number of QP/SQ priority/SL update events. cq_overrun - number of times CQ entered an error state due to an overflow. async_eq_overrun -number of time an EQ mapped to async events was overrun. comp_eq_overrun - number of time an EQ mapped to completion events was overrun. quota_exceeded_command - number of commands issued and failed due to quota exceeded. invalid_command - number of commands issued and failed dues to any reason other than quota exceeded. Signed-off-by: Saeed Mahameed Signed-off-by: Michael Guralnik Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 91872afb2bfe..f678ad88a7d5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1441,7 +1441,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_120[0xa]; u8 log_max_ra_req_dc[0x6]; - u8 reserved_at_130[0xa]; + u8 reserved_at_130[0x9]; + u8 vnic_env_cq_overrun[0x1]; u8 log_max_ra_res_dc[0x6]; u8 reserved_at_140[0x5]; @@ -1636,7 +1637,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 nic_receive_steering_discard[0x1]; u8 receive_discard_vport_down[0x1]; u8 transmit_discard_vport_down[0x1]; - u8 reserved_at_343[0x5]; + u8 eq_overrun_count[0x1]; + u8 reserved_at_344[0x1]; + u8 invalid_command_count[0x1]; + u8 quota_exceeded_count[0x1]; + u8 reserved_at_347[0x1]; u8 log_max_flow_counter_bulk[0x8]; u8 max_flow_counter_15_0[0x10]; @@ -3441,11 +3446,21 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 transmit_discard_vport_down[0x40]; - u8 reserved_at_140[0xa0]; + u8 async_eq_overrun[0x20]; + + u8 comp_eq_overrun[0x20]; + + u8 reserved_at_180[0x20]; + + u8 invalid_command[0x20]; + + u8 quota_exceeded_command[0x20]; u8 internal_rq_out_of_buffer[0x20]; - u8 reserved_at_200[0xe00]; + u8 cq_overrun[0x20]; + + u8 reserved_at_220[0xde0]; }; struct mlx5_ifc_traffic_counter_bits { -- cgit v1.2.3 From 91707779a481aab9c7f1d7a5ea3033ce87dc4fd6 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 8 Jun 2022 13:04:49 -0700 Subject: net/mlx5: Add support EXECUTE_ASO action for flow entry Attach flow meter to FTE with object id and index. Use metadata register C5 to store the packet color meter result. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 33 ++++++++++++++++++++++++ include/linux/mlx5/fs.h | 14 ++++++++++ 2 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 2ccf7bef9b05..735dc805dad7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -479,6 +479,30 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, return 0; } + +static void +mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context) +{ + void *exe_aso_ctrl; + void *execute_aso; + + execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context, + execute_aso[0]); + MLX5_SET(execute_aso, execute_aso, valid, 1); + MLX5_SET(execute_aso, execute_aso, aso_object_id, + fte->action.exe_aso.object_id); + + exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id, + fte->action.exe_aso.return_reg_id); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type, + fte->action.exe_aso.type); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color, + fte->action.exe_aso.flow_meter.init_color); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id, + fte->action.exe_aso.flow_meter.meter_idx); +} + static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int opmod, int modify_mask, struct mlx5_flow_table *ft, @@ -663,6 +687,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, list_size); } + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) { + mlx5_cmd_set_fte_flow_meter(fte, in_flow_context); + } else { + err = -EOPNOTSUPP; + goto err_out; + } + } + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); err_out: kvfree(in); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8135713b0d2d..ece3e35622d7 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -212,6 +212,19 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_exe_aso { + u32 object_id; + u8 type; + u8 return_reg_id; + union { + u32 ctrl_data; + struct { + u8 meter_idx; + u8 init_color; + } flow_meter; + }; +}; + struct mlx5_fs_vlan { u16 ethtype; u16 vid; @@ -237,6 +250,7 @@ struct mlx5_flow_act { struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; struct mlx5_flow_group *fg; + struct mlx5_exe_aso exe_aso; }; #define MLX5_DECLARE_FLOW_ACT(name) \ -- cgit v1.2.3 From d107ba1f7c067b08eb4b8ca7c51187fb7c4d97f2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 8 Jun 2022 13:04:51 -0700 Subject: net/mlx5: Remove not used MLX5_CAP_BITS_RW_MASK Remove not used MLX5_CAP_BITS_RW_MASK. While at it, remove CAP_MASK, MLX5_CAP_OFF_CMDIF_CSUM and MLX5_DEV_CAP_FLAG_*, since MLX5_CAP_BITS_RW_MASK was their only user. Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 ------- include/linux/mlx5/device.h | 19 ------------------- 2 files changed, 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c9b4e50a593e..2078d9f03a5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -314,13 +314,6 @@ struct mlx5_reg_host_endianness { u8 rsvd[15]; }; -#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos)) - -enum { - MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) | - MLX5_DEV_CAP_FLAG_DCT, -}; - static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) { switch (size) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 15ac02eeed4f..95a4fa0fd40a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -386,21 +386,6 @@ enum { MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, }; -enum { - MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, - MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, - MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, - MLX5_DEV_CAP_FLAG_APM = 1LL << 17, - MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, - MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, - MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, - MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, - MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, - MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, - MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, - MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, -}; - enum { MLX5_ROCE_VERSION_1 = 0, MLX5_ROCE_VERSION_2 = 2, @@ -496,10 +481,6 @@ enum { MLX5_MAX_PAGE_SHIFT = 31 }; -enum { - MLX5_CAP_OFF_CMDIF_CSUM = 46, -}; - enum { /* * Max wqe size for rdma read is 512 bytes, so this -- cgit v1.2.3 From cdcdce948d64139aea1c6dfea4b04f5c8ad2784e Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Wed, 8 Jun 2022 13:04:52 -0700 Subject: net/mlx5: Add bits and fields to support enhanced CQE compression Expose ifc bits and add needed structure fields and methods to support enhanced CQE compression feature. The enhanced CQE compression feature improves cpu utiliziation with better packet latency from nic to host. Signed-off-by: Ofer Levi Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 16 +++++++++++++++- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 95a4fa0fd40a..b5f58fd37a0f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -822,7 +822,10 @@ struct mlx5_cqe64 { __be32 timestamp_l; __be32 sop_drop_qpn; __be16 wqe_counter; - u8 signature; + union { + u8 signature; + u8 validity_iteration_count; + }; u8 op_own; }; @@ -854,6 +857,11 @@ enum { MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3, }; +enum { + MLX5_CQE_COMPRESS_LAYOUT_BASIC = 0, + MLX5_CQE_COMPRESS_LAYOUT_ENHANCED = 1, +}; + #define MLX5_MINI_CQE_ARRAY_SIZE 8 static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) @@ -866,6 +874,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) return cqe->op_own >> 4; } +static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe) +{ + /* num_of_mini_cqes is zero based */ + return get_cqe_opcode(cqe) + 1; +} + static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro.tcppsh_abort_dupack >> 6) & 1; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f678ad88a7d5..8e87eb47f9dc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1739,7 +1739,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_dci_errored_streams[0x5]; u8 reserved_at_598[0x8]; - u8 reserved_at_5a0[0x13]; + u8 reserved_at_5a0[0x10]; + u8 enhanced_cqe_compression[0x1]; + u8 reserved_at_5b1[0x2]; u8 log_max_dek[0x5]; u8 reserved_at_5b8[0x4]; u8 mini_cqe_resp_stride_index[0x1]; @@ -4139,7 +4141,8 @@ struct mlx5_ifc_cqc_bits { u8 cqe_comp_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; - u8 reserved_at_18[0x8]; + u8 reserved_at_18[0x6]; + u8 cqe_compression_layout[0x2]; u8 reserved_at_20[0x20]; -- cgit v1.2.3 From 6dbdc9f35360d4ef4704462c265f63b32fcb5354 Mon Sep 17 00:00:00 2001 From: Hongyi Lu Date: Mon, 13 Jun 2022 21:16:33 +0000 Subject: bpf: Fix spelling in bpf_verifier.h Minor spelling fix spotted in bpf_verifier.h. Spelling is no big deal, but it is still an improvement when reading through the code. Signed-off-by: Hongyi Lu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220613211633.58647-1-jwnhy0@gmail.com --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e8439f6cbe57..3930c963fa67 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -299,7 +299,7 @@ struct bpf_verifier_state { * If is_state_visited() sees a state with branches > 0 it means * there is a loop. If such state is exactly equal to the current state * it's an infinite loop. Note states_equal() checks for states - * equvalency, so two states being 'states_equal' does not mean + * equivalency, so two states being 'states_equal' does not mean * infinite loop. The exact comparison is provided by * states_maybe_looping() function. It's a stronger pre-check and * much faster than states_equal(). -- cgit v1.2.3 From d687f621c518d791b5fffde8add3112d869b0b1b Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Tue, 14 Jun 2022 23:10:42 +0000 Subject: bpf: move bpf_prog to bpf.h In order to add a version of bpf_prog_run_array which accesses the bpf_prog->aux member, bpf_prog needs to be more than a forward declaration inside bpf.h. Given that filter.h already includes bpf.h, this merely reorders the type declarations for filter.h users. bpf.h users now have access to bpf_prog internals. Signed-off-by: Delyan Kratunov Link: https://lore.kernel.org/r/3ed7824e3948f22d84583649ccac0ff0d38b6b58.1655248076.git.delyank@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 36 ++++++++++++++++++++++++++++++++++++ include/linux/filter.h | 34 ---------------------------------- 2 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8e6092d0ea95..69106ae46464 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -5,6 +5,7 @@ #define _LINUX_BPF_H 1 #include +#include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -1084,6 +1086,40 @@ struct bpf_prog_aux { }; }; +struct bpf_prog { + u16 pages; /* Number of allocated pages */ + u16 jited:1, /* Is our filter JIT'ed? */ + jit_requested:1,/* archs need to JIT the prog */ + gpl_compatible:1, /* Is filter GPL compatible? */ + cb_access:1, /* Is control block accessed? */ + dst_needed:1, /* Do we need dst entry? */ + blinding_requested:1, /* needs constant blinding */ + blinded:1, /* Was blinded */ + is_func:1, /* program is a bpf function */ + kprobe_override:1, /* Do we override a kprobe? */ + has_callchain_buf:1, /* callchain buffer allocated? */ + enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ + call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ + call_get_func_ip:1, /* Do we call get_func_ip() */ + tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + enum bpf_prog_type type; /* Type of BPF program */ + enum bpf_attach_type expected_attach_type; /* For some prog types */ + u32 len; /* Number of filter blocks */ + u32 jited_len; /* Size of jited insns in bytes */ + u8 tag[BPF_TAG_SIZE]; + struct bpf_prog_stats __percpu *stats; + int __percpu *active; + unsigned int (*bpf_func)(const void *ctx, + const struct bpf_insn *insn); + struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ + /* Instructions for interpreter */ + union { + DECLARE_FLEX_ARRAY(struct sock_filter, insns); + DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); + }; +}; + struct bpf_array_aux { /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; diff --git a/include/linux/filter.h b/include/linux/filter.h index ed0c0ff42ad5..d0cbb31b1b4d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -559,40 +559,6 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); -struct bpf_prog { - u16 pages; /* Number of allocated pages */ - u16 jited:1, /* Is our filter JIT'ed? */ - jit_requested:1,/* archs need to JIT the prog */ - gpl_compatible:1, /* Is filter GPL compatible? */ - cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - blinding_requested:1, /* needs constant blinding */ - blinded:1, /* Was blinded */ - is_func:1, /* program is a bpf function */ - kprobe_override:1, /* Do we override a kprobe? */ - has_callchain_buf:1, /* callchain buffer allocated? */ - enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ - call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ - call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ - enum bpf_prog_type type; /* Type of BPF program */ - enum bpf_attach_type expected_attach_type; /* For some prog types */ - u32 len; /* Number of filter blocks */ - u32 jited_len; /* Size of jited insns in bytes */ - u8 tag[BPF_TAG_SIZE]; - struct bpf_prog_stats __percpu *stats; - int __percpu *active; - unsigned int (*bpf_func)(const void *ctx, - const struct bpf_insn *insn); - struct bpf_prog_aux *aux; /* Auxiliary fields */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ - /* Instructions for interpreter */ - union { - DECLARE_FLEX_ARRAY(struct sock_filter, insns); - DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi); - }; -}; - struct sk_filter { refcount_t refcnt; struct rcu_head rcu; -- cgit v1.2.3 From 8c7dcb84e3b744b2b70baa7a44a9b1881c33a9c9 Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Tue, 14 Jun 2022 23:10:46 +0000 Subject: bpf: implement sleepable uprobes by chaining gps uprobes work by raising a trap, setting a task flag from within the interrupt handler, and processing the actual work for the uprobe on the way back to userspace. As a result, uprobe handlers already execute in a might_fault/_sleep context. The primary obstacle to sleepable bpf uprobe programs is therefore on the bpf side. Namely, the bpf_prog_array attached to the uprobe is protected by normal rcu. In order for uprobe bpf programs to become sleepable, it has to be protected by the tasks_trace rcu flavor instead (and kfree() called after a corresponding grace period). Therefore, the free path for bpf_prog_array now chains a tasks_trace and normal grace periods one after the other. Users who iterate under tasks_trace read section would be safe, as would users who iterate under normal read sections (from non-sleepable locations). The downside is that the tasks_trace latency affects all perf_event-attached bpf programs (and not just uprobe ones). This is deemed safe given the possible attach rates for kprobe/uprobe/tp programs. Separately, non-sleepable programs need access to dynamically sized rcu-protected maps, so bpf_run_prog_array_sleepables now conditionally takes an rcu read section, in addition to the overarching tasks_trace section. Signed-off-by: Delyan Kratunov Link: https://lore.kernel.org/r/ce844d62a2fd0443b08c5ab02e95bc7149f9aeb1.1655248076.git.delyank@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 52 +++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/core.c | 15 +++++++++++++ kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_uprobe.c | 5 ++--- 4 files changed, 71 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 69106ae46464..f3e88afdaffe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -26,6 +26,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -1372,6 +1373,8 @@ extern struct bpf_empty_prog_array bpf_empty_prog_array; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array *progs); +/* Use when traversal over the bpf_prog_array uses tasks_trace rcu */ +void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs); int bpf_prog_array_length(struct bpf_prog_array *progs); bool bpf_prog_array_is_empty(struct bpf_prog_array *array); int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs, @@ -1463,6 +1466,55 @@ bpf_prog_run_array(const struct bpf_prog_array *array, return ret; } +/* Notes on RCU design for bpf_prog_arrays containing sleepable programs: + * + * We use the tasks_trace rcu flavor read section to protect the bpf_prog_array + * overall. As a result, we must use the bpf_prog_array_free_sleepable + * in order to use the tasks_trace rcu grace period. + * + * When a non-sleepable program is inside the array, we take the rcu read + * section and disable preemption for that program alone, so it can access + * rcu-protected dynamically sized maps. + */ +static __always_inline u32 +bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) +{ + const struct bpf_prog_array_item *item; + const struct bpf_prog *prog; + const struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_trace_run_ctx run_ctx; + u32 ret = 1; + + might_fault(); + + rcu_read_lock_trace(); + migrate_disable(); + + array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); + if (unlikely(!array)) + goto out; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + if (!prog->aux->sleepable) + rcu_read_lock(); + + run_ctx.bpf_cookie = item->bpf_cookie; + ret &= run_prog(prog, ctx); + item++; + + if (!prog->aux->sleepable) + rcu_read_unlock(); + } + bpf_reset_run_ctx(old_run_ctx); +out: + migrate_enable(); + rcu_read_unlock_trace(); + return ret; +} + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e78cc5eea4a5..b5ffebcce6cc 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2279,6 +2279,21 @@ void bpf_prog_array_free(struct bpf_prog_array *progs) kfree_rcu(progs, rcu); } +static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu) +{ + struct bpf_prog_array *progs; + + progs = container_of(rcu, struct bpf_prog_array, rcu); + kfree_rcu(progs, rcu); +} + +void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs) +{ + if (!progs || progs == &bpf_empty_prog_array.hdr) + return; + call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb); +} + int bpf_prog_array_length(struct bpf_prog_array *array) { struct bpf_prog_array_item *item; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 10b157a6d73e..d1c22594dbf9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1936,7 +1936,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event, event->prog = prog; event->bpf_cookie = bpf_cookie; rcu_assign_pointer(event->tp_event->prog_array, new_array); - bpf_prog_array_free(old_array); + bpf_prog_array_free_sleepable(old_array); unlock: mutex_unlock(&bpf_event_mutex); @@ -1962,7 +1962,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event) bpf_prog_array_delete_safe(old_array, event->prog); } else { rcu_assign_pointer(event->tp_event->prog_array, new_array); - bpf_prog_array_free(old_array); + bpf_prog_array_free_sleepable(old_array); } bpf_prog_put(event->prog); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9711589273cd..0282c119b1b2 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "trace_dynevent.h" #include "trace_probe.h" @@ -1346,9 +1347,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (bpf_prog_array_valid(call)) { u32 ret; - preempt_disable(); - ret = trace_call_bpf(call, regs); - preempt_enable(); + ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run); if (!ret) return; } -- cgit v1.2.3 From a793679827a87b01f9d973ea30b923cd5a3ff2c5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 14 Jun 2022 10:46:11 +0200 Subject: linux/phy.h: add phydev_err_probe() wrapper for dev_err_probe() The dev_err_probe() function is quite useful to avoid boilerplate related to -EPROBE_DEFER handling. Add a phydev_err_probe() helper to simplify making use of that from phy drivers which otherwise use the phydev_* helpers. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 508f1149665b..bed9a347481b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1539,6 +1539,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value) #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) +#define phydev_err_probe(_phydev, err, format, args...) \ + dev_err_probe(&_phydev->mdio.dev, err, format, ##args) + #define phydev_info(_phydev, format, args...) \ dev_info(&_phydev->mdio.dev, format, ##args) -- cgit v1.2.3 From 508362ac66b0478affb4e52cb8da98478312d72d Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:43 +0300 Subject: bpf: Allow helpers to accept pointers with a fixed size Before this commit, the BPF verifier required ARG_PTR_TO_MEM arguments to be followed by ARG_CONST_SIZE holding the size of the memory region. The helpers had to check that size in runtime. There are cases where the size expected by a helper is a compile-time constant. Checking it in runtime is an unnecessary overhead and waste of BPF registers. This commit allows helpers to accept pointers to memory without the corresponding ARG_CONST_SIZE, given that they define the memory region size in struct bpf_func_proto and use ARG_PTR_TO_FIXED_SIZE_MEM type. arg_size is unionized with arg_btf_id to reduce the kernel image size, and it's valid because they are used by different argument types. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-3-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 13 +++++++++++++ kernel/bpf/verifier.c | 43 ++++++++++++++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f3e88afdaffe..a94531971a7a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -401,6 +401,9 @@ enum bpf_type_flag { /* DYNPTR points to a ringbuf record. */ DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + /* Size is known at compile time. */ + MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -464,6 +467,8 @@ enum bpf_arg_type { * all bytes or clear them in error case. */ ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, + /* Pointer to valid memory of size known at compile time. */ + ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -529,6 +534,14 @@ struct bpf_func_proto { u32 *arg5_btf_id; }; u32 *arg_btf_id[5]; + struct { + size_t arg1_size; + size_t arg2_size; + size_t arg3_size; + size_t arg4_size; + size_t arg5_size; + }; + size_t arg_size[5]; }; int *ret_btf_id; /* return value btf_id */ bool (*allowed)(const struct bpf_prog *prog); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eadc23a8452c..2859901ffbe3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5848,6 +5848,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_arg_type arg_type = fn->arg_type[arg]; enum bpf_reg_type type = reg->type; + u32 *arg_btf_id = NULL; int err = 0; if (arg_type == ARG_DONTCARE) @@ -5884,7 +5885,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, */ goto skip_type_check; - err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta); + /* arg_btf_id and arg_size are in a union. */ + if (base_type(arg_type) == ARG_PTR_TO_BTF_ID) + arg_btf_id = fn->arg_btf_id[arg]; + + err = check_reg_type(env, regno, arg_type, arg_btf_id, meta); if (err) return err; @@ -6011,6 +6016,11 @@ skip_type_check: * next is_mem_size argument below. */ meta->raw_mode = arg_type & MEM_UNINIT; + if (arg_type & MEM_FIXED_SIZE) { + err = check_helper_mem_access(env, regno, + fn->arg_size[arg], false, + meta); + } } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); @@ -6400,11 +6410,19 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) return count <= 1; } -static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, - enum bpf_arg_type arg_next) +static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg) { - return (base_type(arg_curr) == ARG_PTR_TO_MEM) != - arg_type_is_mem_size(arg_next); + bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE; + bool has_size = fn->arg_size[arg] != 0; + bool is_next_size = false; + + if (arg + 1 < ARRAY_SIZE(fn->arg_type)) + is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]); + + if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM) + return is_next_size; + + return has_size == is_next_size || is_next_size == is_fixed; } static bool check_arg_pair_ok(const struct bpf_func_proto *fn) @@ -6415,11 +6433,11 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) * helper function specification. */ if (arg_type_is_mem_size(fn->arg1_type) || - base_type(fn->arg5_type) == ARG_PTR_TO_MEM || - check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || - check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || - check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || - check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) + check_args_pair_invalid(fn, 0) || + check_args_pair_invalid(fn, 1) || + check_args_pair_invalid(fn, 2) || + check_args_pair_invalid(fn, 3) || + check_args_pair_invalid(fn, 4)) return false; return true; @@ -6460,7 +6478,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) return false; - if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] && + /* arg_btf_id and arg_size are in a union. */ + (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM || + !(fn->arg_type[i] & MEM_FIXED_SIZE))) return false; } -- cgit v1.2.3 From fa9c562f9735d24c3253747eb21f3f0c0f6de48e Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Wed, 15 Jun 2022 16:39:04 +0800 Subject: net: make xpcs_do_config to accept advertising for pcs-xpcs and sja1105 xpcs_config() has 'advertising' input that is required for C37 1000BASE-X AN in later patch series. So, we prepare xpcs_do_config() for it. For sja1105, xpcs_do_config() is used for xpcs configuration without depending on advertising input, so set to NULL. Reported-by: kernel test robot Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 2 +- drivers/net/pcs/pcs-xpcs.c | 6 +++--- include/linux/pcs/pcs-xpcs.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 72b6fc1932b5..b253e27bcfb4 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2330,7 +2330,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv, else mode = MLO_AN_PHY; - rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode); + rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode, NULL); if (rc < 0) goto out; diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 4cfd05c15aee..48d81c40aab7 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -795,7 +795,7 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs) } int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - unsigned int mode) + unsigned int mode, const unsigned long *advertising) { const struct xpcs_compat *compat; int ret; @@ -843,7 +843,7 @@ static int xpcs_config(struct phylink_pcs *pcs, unsigned int mode, { struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); - return xpcs_do_config(xpcs, interface, mode); + return xpcs_do_config(xpcs, interface, mode, advertising); } static int xpcs_get_state_c73(struct dw_xpcs *xpcs, @@ -864,7 +864,7 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs, state->link = 0; - return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND); + return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND, NULL); } if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state, compat)) { diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 266eb26fb029..37eb97cc2283 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -30,7 +30,7 @@ int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - unsigned int mode); + unsigned int mode, const unsigned long *advertising); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -- cgit v1.2.3 From b47aec885bcd672ebca2108a8b7e9ce3e3982775 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Wed, 15 Jun 2022 16:39:06 +0800 Subject: net: pcs: xpcs: add CL37 1000BASE-X AN support For CL37 1000BASE-X AN, DW xPCS does not support C22 method but offers C45 vendor-specific MII MMD for programming. We also add the ability to disable Autoneg (through ethtool for certain network switch that supports 1000BASE-X (1000Mbps and Full-Duplex) but not Autoneg capability. v4: Fixes to comment from Russell King. Thanks! https://patchwork.kernel.org/comment/24894239/ Make xpcs_modify_changed() as private, change to use mdiodev_modify_changed() for cleaner code. v3: Fixes to issues spotted by Russell King. Thanks! https://patchwork.kernel.org/comment/24890210/ Use phylink_mii_c22_pcs_decode_state(), remove unnecessary interrupt clearing and skip speed & duplex setting if AN is enabled. v2: Fixes to issues spotted by Russell King in v1. Thanks! https://patchwork.kernel.org/comment/24826650/ Use phylink_mii_c22_pcs_encode_advertisement() and implement C45 MII ADV handling since IP only support C45 access. Tested-by: Emilio Riva Signed-off-by: Ong Boon Leong Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 170 +++++++++++++++++++++++++++++++++++++++++++ drivers/net/pcs/pcs-xpcs.h | 1 - include/linux/pcs/pcs-xpcs.h | 1 + 3 files changed, 171 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 48d81c40aab7..a5d520e34ea3 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -77,6 +77,14 @@ static const int xpcs_sgmii_features[] = { __ETHTOOL_LINK_MODE_MASK_NBITS, }; +static const int xpcs_1000basex_features[] = { + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_Autoneg_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + static const int xpcs_2500basex_features[] = { ETHTOOL_LINK_MODE_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT, @@ -102,6 +110,10 @@ static const phy_interface_t xpcs_sgmii_interfaces[] = { PHY_INTERFACE_MODE_SGMII, }; +static const phy_interface_t xpcs_1000basex_interfaces[] = { + PHY_INTERFACE_MODE_1000BASEX, +}; + static const phy_interface_t xpcs_2500basex_interfaces[] = { PHY_INTERFACE_MODE_2500BASEX, PHY_INTERFACE_MODE_MAX, @@ -112,6 +124,7 @@ enum { DW_XPCS_10GKR, DW_XPCS_XLGMII, DW_XPCS_SGMII, + DW_XPCS_1000BASEX, DW_XPCS_2500BASEX, DW_XPCS_INTERFACE_MAX, }; @@ -189,6 +202,14 @@ int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val) return mdiobus_c45_write(bus, addr, dev, reg, val); } +static int xpcs_modify_changed(struct dw_xpcs *xpcs, int dev, u32 reg, + u16 mask, u16 set) +{ + u32 reg_addr = mdiobus_c45_addr(dev, reg); + + return mdiodev_modify_changed(xpcs->mdiodev, reg_addr, mask, set); +} + static int xpcs_read_vendor(struct dw_xpcs *xpcs, int dev, u32 reg) { return xpcs_read(xpcs, dev, DW_VENDOR | reg); @@ -237,6 +258,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs, break; case DW_AN_C37_SGMII: case DW_2500BASEX: + case DW_AN_C37_1000BASEX: dev = MDIO_MMD_VEND2; break; default: @@ -772,6 +794,68 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode) return ret; } +static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, unsigned int mode, + const unsigned long *advertising) +{ + phy_interface_t interface = PHY_INTERFACE_MODE_1000BASEX; + int ret, mdio_ctrl, adv; + bool changed = 0; + + /* According to Chap 7.12, to set 1000BASE-X C37 AN, AN must + * be disabled first:- + * 1) VR_MII_MMD_CTRL Bit(12)[AN_ENABLE] = 0b + * 2) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 00b (1000BASE-X C37) + */ + mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL); + if (mdio_ctrl < 0) + return mdio_ctrl; + + if (mdio_ctrl & AN_CL37_EN) { + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, + mdio_ctrl & ~AN_CL37_EN); + if (ret < 0) + return ret; + } + + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL); + if (ret < 0) + return ret; + + ret &= ~DW_VR_MII_PCS_MODE_MASK; + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, ret); + if (ret < 0) + return ret; + + /* Check for advertising changes and update the C45 MII ADV + * register accordingly. + */ + adv = phylink_mii_c22_pcs_encode_advertisement(interface, + advertising); + if (adv >= 0) { + ret = xpcs_modify_changed(xpcs, MDIO_MMD_VEND2, + MII_ADVERTISE, 0xffff, adv); + if (ret < 0) + return ret; + + changed = ret; + } + + /* Clear CL37 AN complete status */ + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0); + if (ret < 0) + return ret; + + if (phylink_autoneg_inband(mode) && + linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) { + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, + mdio_ctrl | AN_CL37_EN); + if (ret < 0) + return ret; + } + + return changed; +} + static int xpcs_config_2500basex(struct dw_xpcs *xpcs) { int ret; @@ -817,6 +901,12 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, if (ret) return ret; break; + case DW_AN_C37_1000BASEX: + ret = xpcs_config_aneg_c37_1000basex(xpcs, mode, + advertising); + if (ret) + return ret; + break; case DW_2500BASEX: ret = xpcs_config_2500basex(xpcs); if (ret) @@ -921,6 +1011,29 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, return 0; } +static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs, + struct phylink_link_state *state) +{ + int lpa, bmsr; + + if (state->an_enabled) { + /* Reset link state */ + state->link = false; + + lpa = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_LPA); + if (lpa < 0 || lpa & LPA_RFAULT) + return lpa; + + bmsr = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_BMSR); + if (bmsr < 0) + return bmsr; + + phylink_mii_c22_pcs_decode_state(state, bmsr, lpa); + } + + return 0; +} + static void xpcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state) { @@ -948,6 +1061,13 @@ static void xpcs_get_state(struct phylink_pcs *pcs, ERR_PTR(ret)); } break; + case DW_AN_C37_1000BASEX: + ret = xpcs_get_state_c37_1000basex(xpcs, state); + if (ret) { + pr_err("xpcs_get_state_c37_1000basex returned %pe\n", + ERR_PTR(ret)); + } + break; default: return; } @@ -983,6 +1103,35 @@ static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int mode, pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret)); } +static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int mode, + int speed, int duplex) +{ + int val, ret; + + if (phylink_autoneg_inband(mode)) + return; + + switch (speed) { + case SPEED_1000: + val = BMCR_SPEED1000; + break; + case SPEED_100: + case SPEED_10: + default: + pr_err("%s: speed = %d\n", __func__, speed); + return; + } + + if (duplex == DUPLEX_FULL) + val |= BMCR_FULLDPLX; + else + pr_err("%s: half duplex not supported\n", __func__); + + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val); + if (ret) + pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret)); +} + void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex) { @@ -992,9 +1141,23 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode, return xpcs_config_usxgmii(xpcs, speed); if (interface == PHY_INTERFACE_MODE_SGMII) return xpcs_link_up_sgmii(xpcs, mode, speed, duplex); + if (interface == PHY_INTERFACE_MODE_1000BASEX) + return xpcs_link_up_1000basex(xpcs, mode, speed, duplex); } EXPORT_SYMBOL_GPL(xpcs_link_up); +static void xpcs_an_restart(struct phylink_pcs *pcs) +{ + struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); + int ret; + + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1); + if (ret >= 0) { + ret |= BMCR_ANRESTART; + xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, ret); + } +} + static u32 xpcs_get_id(struct dw_xpcs *xpcs) { int ret; @@ -1060,6 +1223,12 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = { .num_interfaces = ARRAY_SIZE(xpcs_sgmii_interfaces), .an_mode = DW_AN_C37_SGMII, }, + [DW_XPCS_1000BASEX] = { + .supported = xpcs_1000basex_features, + .interface = xpcs_1000basex_interfaces, + .num_interfaces = ARRAY_SIZE(xpcs_1000basex_interfaces), + .an_mode = DW_AN_C37_1000BASEX, + }, [DW_XPCS_2500BASEX] = { .supported = xpcs_2500basex_features, .interface = xpcs_2500basex_interfaces, @@ -1115,6 +1284,7 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = { .pcs_validate = xpcs_validate, .pcs_config = xpcs_config, .pcs_get_state = xpcs_get_state, + .pcs_an_restart = xpcs_an_restart, .pcs_link_up = xpcs_link_up, }; diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index 35651d32a224..770df50323a0 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -109,7 +109,6 @@ int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg); int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val); - int nxp_sja1105_sgmii_pma_config(struct dw_xpcs *xpcs); int nxp_sja1110_sgmii_pma_config(struct dw_xpcs *xpcs); int nxp_sja1110_2500basex_pma_config(struct dw_xpcs *xpcs); diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 37eb97cc2283..d2da1e0b4a92 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -17,6 +17,7 @@ #define DW_AN_C73 1 #define DW_AN_C37_SGMII 2 #define DW_2500BASEX 3 +#define DW_AN_C37_1000BASEX 4 struct xpcs_id; -- cgit v1.2.3 From dc368e1c658e4f478a45e8d1d5b0c8392ca87506 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 16 Jun 2022 15:54:07 -0700 Subject: bpf: Fix non-static bpf_func_proto struct definitions This patch does two things: 1) Marks the dynptr bpf_func_proto structs that were added in [1] as static, as pointed out by the kernel test robot in [2]. 2) There are some bpf_func_proto structs marked as extern which can instead be statically defined. [1] https://lore.kernel.org/bpf/20220523210712.3641569-1-joannelkoong@gmail.com/ [2] https://lore.kernel.org/bpf/62ab89f2.Pko7sI08RAKdF8R6%25lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Joanne Koong Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220616225407.1878436-1-joannelkoong@gmail.com --- include/linux/bpf.h | 3 --- kernel/bpf/helpers.c | 12 ++++++------ kernel/bpf/syscall.c | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a94531971a7a..0edd7d2c0064 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2363,12 +2363,9 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; -extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; -extern const struct bpf_func_proto bpf_strncmp_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; -extern const struct bpf_func_proto bpf_kptr_xchg_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 225806a02efb..a1c84d256f83 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -584,7 +584,7 @@ BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) return strncmp(s1, s2, s1_sz); } -const struct bpf_func_proto bpf_strncmp_proto = { +static const struct bpf_func_proto bpf_strncmp_proto = { .func = bpf_strncmp, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1402,7 +1402,7 @@ BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) */ #define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_func_proto bpf_kptr_xchg_proto = { +static const struct bpf_func_proto bpf_kptr_xchg_proto = { .func = bpf_kptr_xchg, .gpl_only = false, .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, @@ -1487,7 +1487,7 @@ error: return err; } -const struct bpf_func_proto bpf_dynptr_from_mem_proto = { +static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .func = bpf_dynptr_from_mem, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1513,7 +1513,7 @@ BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src return 0; } -const struct bpf_func_proto bpf_dynptr_read_proto = { +static const struct bpf_func_proto bpf_dynptr_read_proto = { .func = bpf_dynptr_read, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1539,7 +1539,7 @@ BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, return 0; } -const struct bpf_func_proto bpf_dynptr_write_proto = { +static const struct bpf_func_proto bpf_dynptr_write_proto = { .func = bpf_dynptr_write, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1566,7 +1566,7 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len return (unsigned long)(ptr->data + ptr->offset + offset); } -const struct bpf_func_proto bpf_dynptr_data_proto = { +static const struct bpf_func_proto bpf_dynptr_data_proto = { .func = bpf_dynptr_data, .gpl_only = false, .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index aeb31137b2ed..7d5af5b99f0d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5131,7 +5131,7 @@ BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flag return *res ? 0 : -ENOENT; } -const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { +static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { .func = bpf_kallsyms_lookup_name, .gpl_only = false, .ret_type = RET_INTEGER, -- cgit v1.2.3 From bdb6cfe7512f7a214815a3092f0be50963dcacbc Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 18 Jun 2022 11:28:32 +0100 Subject: net: mii: add mii_bmcr_encode_fixed() Add a function to encode a fixed speed/duplex to a BMCR value. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/mii.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 5ee13083cec7..d5a959ce4877 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -545,4 +545,39 @@ static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv) return cap; } +/** + * mii_bmcr_encode_fixed - encode fixed speed/duplex settings to a BMCR value + * @speed: a SPEED_* value + * @duplex: a DUPLEX_* value + * + * Encode the speed and duplex to a BMCR value. 2500, 1000, 100 and 10 Mbps are + * supported. 2500Mbps is encoded to 1000Mbps. Other speeds are encoded as 10 + * Mbps. Unknown duplex values are encoded to half-duplex. + */ +static inline u16 mii_bmcr_encode_fixed(int speed, int duplex) +{ + u16 bmcr; + + switch (speed) { + case SPEED_2500: + case SPEED_1000: + bmcr = BMCR_SPEED1000; + break; + + case SPEED_100: + bmcr = BMCR_SPEED100; + break; + + case SPEED_10: + default: + bmcr = BMCR_SPEED10; + break; + } + + if (duplex == DUPLEX_FULL) + bmcr |= BMCR_FULLDPLX; + + return bmcr; +} + #endif /* __LINUX_MII_H__ */ -- cgit v1.2.3 From 7b0a0e3c3a88260b6fcb017e49f198463aa62ed1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Apr 2022 16:50:57 +0200 Subject: wifi: cfg80211: do some rework towards MLO link APIs In order to support multi-link operation with multiple links, start adding some APIs. The notable addition here is to have the link ID in a new nl80211 attribute, that will be used to differentiate the links in many nl80211 operations. So far, this patch adds the netlink NL80211_ATTR_MLO_LINK_ID attribute (as well as the NL80211_ATTR_MLO_LINKS attribute) and plugs it through the system in some places, checking the validity etc. along with other infrastructure needed for it. For now, I've decided to include only the over-the-air link ID in the API. I know we discussed that we eventually need to have to have other ways of identifying a link, but for local AP mode and auth/assoc commands as well as set_key etc. we'll use the OTA ID. Also included in this patch is some refactoring of the data structures in struct wireless_dev, splitting for the first time the data into type dependent pieces, to make reasoning about these things easier. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 6 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 9 +- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 4 +- drivers/net/wireless/marvell/libertas/mesh.c | 10 +- drivers/net/wireless/marvell/mwifiex/11h.c | 2 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 18 +- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 3 +- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 14 +- drivers/net/wireless/quantenna/qtnfmac/commands.c | 2 +- drivers/net/wireless/quantenna/qtnfmac/event.c | 15 +- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 4 +- include/linux/ieee80211.h | 3 + include/net/cfg80211.h | 99 +++- include/uapi/linux/nl80211.h | 28 + net/mac80211/cfg.c | 8 +- net/mac80211/mlme.c | 2 +- net/wireless/ap.c | 46 +- net/wireless/chan.c | 196 +++++-- net/wireless/core.c | 28 +- net/wireless/core.h | 13 +- net/wireless/ibss.c | 57 +- net/wireless/mesh.c | 31 +- net/wireless/mlme.c | 74 +-- net/wireless/nl80211.c | 623 +++++++++++++++------ net/wireless/ocb.c | 5 +- net/wireless/rdev-ops.h | 32 +- net/wireless/reg.c | 139 +++-- net/wireless/scan.c | 8 +- net/wireless/sme.c | 102 ++-- net/wireless/trace.h | 86 ++- net/wireless/util.c | 44 +- net/wireless/wext-compat.c | 48 +- net/wireless/wext-sme.c | 29 +- 33 files changed, 1255 insertions(+), 533 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index bd1183830e91..33ed54738d47 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1119,7 +1119,7 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT); mutex_lock(&vif->wdev.mtx); - cfg80211_ch_switch_notify(vif->ndev, &chandef); + cfg80211_ch_switch_notify(vif->ndev, &chandef, 0); mutex_unlock(&vif->wdev.mtx); } @@ -2967,7 +2967,8 @@ static int ath6kl_change_beacon(struct wiphy *wiphy, struct net_device *dev, return ath6kl_set_ies(vif, beacon); } -static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); @@ -3368,6 +3369,7 @@ static int ath6kl_cfg80211_sscan_stop(struct wiphy *wiphy, static int ath6kl_cfg80211_set_bitrate(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 8f2638f5b87b..f93bdffa4d1d 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -2098,8 +2098,8 @@ static int wil_cfg80211_change_beacon(struct wiphy *wiphy, bcon->tail_len)) privacy = 1; - memcpy(vif->ssid, wdev->ssid, wdev->ssid_len); - vif->ssid_len = wdev->ssid_len; + memcpy(vif->ssid, wdev->u.ap.ssid, wdev->u.ap.ssid_len); + vif->ssid_len = wdev->u.ap.ssid_len; /* in case privacy has changed, need to restart the AP */ if (vif->privacy != privacy) { @@ -2108,7 +2108,7 @@ static int wil_cfg80211_change_beacon(struct wiphy *wiphy, rc = _wil_cfg80211_start_ap(wiphy, ndev, vif->ssid, vif->ssid_len, privacy, - wdev->beacon_interval, + wdev->links[0].ap.beacon_interval, vif->channel, vif->wmi_edmg_channel, bcon, vif->hidden_ssid, @@ -2186,7 +2186,8 @@ static int wil_cfg80211_start_ap(struct wiphy *wiphy, } static int wil_cfg80211_stop_ap(struct wiphy *wiphy, - struct net_device *ndev) + struct net_device *ndev, + unsigned int link_id) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); struct wil6210_vif *vif = ndev_to_vif(ndev); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 605206abe424..11e1f07f83e0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4965,7 +4965,8 @@ exit: return err; } -static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev) +static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev, + unsigned int link_id) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_if *ifp = netdev_priv(ndev); @@ -5302,6 +5303,7 @@ exit: static int brcmf_cfg80211_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c index a58c1e141f2c..90ffe8d1e0e8 100644 --- a/drivers/net/wireless/marvell/libertas/mesh.c +++ b/drivers/net/wireless/marvell/libertas/mesh.c @@ -109,9 +109,9 @@ static int lbs_mesh_config(struct lbs_private *priv, uint16_t action, if (priv->mesh_dev) { mesh_wdev = priv->mesh_dev->ieee80211_ptr; - ie->val.mesh_id_len = mesh_wdev->mesh_id_up_len; - memcpy(ie->val.mesh_id, mesh_wdev->ssid, - mesh_wdev->mesh_id_up_len); + ie->val.mesh_id_len = mesh_wdev->u.mesh.id_up_len; + memcpy(ie->val.mesh_id, mesh_wdev->u.mesh.id, + mesh_wdev->u.mesh.id_up_len); } ie->len = sizeof(struct mrvl_meshie_val) - @@ -986,8 +986,8 @@ static int lbs_add_mesh(struct lbs_private *priv) mesh_wdev->wiphy = priv->wdev->wiphy; if (priv->mesh_tlv) { - sprintf(mesh_wdev->ssid, "mesh"); - mesh_wdev->mesh_id_up_len = 4; + sprintf(mesh_wdev->u.mesh.id, "mesh"); + mesh_wdev->u.mesh.id_up_len = 4; } mesh_wdev->netdev = mesh_dev; diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index 3fa25cd64cda..4ca8d0135708 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -304,6 +304,6 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work) mwifiex_dbg(priv->adapter, MSG, "indicating channel switch completion to kernel\n"); mutex_lock(&priv->wdev.mtx); - cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef); + cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef, 0); mutex_unlock(&priv->wdev.mtx); } diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 6f23ec34e2e2..d68c40e0e122 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -1753,10 +1753,12 @@ mwifiex_mgmt_stypes[NUM_NL80211_IFTYPES] = { * Function configures data rates to firmware using bitrate mask * provided by cfg80211. */ -static int mwifiex_cfg80211_set_bitrate_mask(struct wiphy *wiphy, - struct net_device *dev, - const u8 *peer, - const struct cfg80211_bitrate_mask *mask) +static int +mwifiex_cfg80211_set_bitrate_mask(struct wiphy *wiphy, + struct net_device *dev, + unsigned int link_id, + const u8 *peer, + const struct cfg80211_bitrate_mask *mask) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); u16 bitmap_rates[MAX_BITMAP_RATES_SIZE]; @@ -1998,7 +2000,8 @@ mwifiex_cfg80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) /* cfg80211 operation handler for stop ap. * Function stops BSS running at uAP interface. */ -static int mwifiex_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int mwifiex_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); @@ -2421,7 +2424,7 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } - if (priv->wdev.current_bss) { + if (priv->wdev.connected) { mwifiex_dbg(adapter, ERROR, "%s: already connected\n", dev->name); return -EALREADY; @@ -2649,7 +2652,7 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, return -EBUSY; } - if (!priv->wdev.current_bss && priv->scan_block) + if (!priv->wdev.connected && priv->scan_block) priv->scan_block = false; if (!mwifiex_stop_bg_scan(priv)) @@ -4025,6 +4028,7 @@ mwifiex_cfg80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 1ac4684fab25..5c2c7f1dbffd 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1426,7 +1426,8 @@ static int change_beacon(struct wiphy *wiphy, struct net_device *dev, return wilc_add_beacon(vif, 0, 0, beacon); } -static int stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { int ret; struct wilc_vif *vif = netdev_priv(dev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 84b15a655eab..1593e810b3ca 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -352,7 +352,8 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, return ret; } -static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; @@ -500,7 +501,7 @@ qtnf_dump_station(struct wiphy *wiphy, struct net_device *dev, switch (vif->wdev.iftype) { case NL80211_IFTYPE_STATION: - if (idx != 0 || !vif->wdev.current_bss) + if (idx != 0 || !vif->wdev.connected) return -ENOENT; ether_addr_copy(mac, vif->bssid); @@ -729,7 +730,7 @@ qtnf_disconnect(struct wiphy *wiphy, struct net_device *dev, pr_err("VIF%u.%u: failed to disconnect\n", mac->macid, vif->vifid); - if (vif->wdev.current_bss) { + if (vif->wdev.connected) { netif_carrier_off(vif->netdev); cfg80211_disconnected(vif->netdev, reason_code, NULL, 0, true, GFP_KERNEL); @@ -745,10 +746,11 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev, struct qtnf_wmac *mac = wiphy_priv(wiphy); struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_supported_band *sband; - const struct cfg80211_chan_def *chandef = &wdev->chandef; + const struct cfg80211_chan_def *chandef = wdev_chandef(wdev, 0); struct ieee80211_channel *chan; int ret; + sband = wiphy->bands[NL80211_BAND_2GHZ]; if (sband && idx >= sband->n_channels) { idx -= sband->n_channels; @@ -765,7 +767,7 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev, survey->channel = chan; survey->filled = 0x0; - if (chan == chandef->chan) + if (chandef && chan == chandef->chan) survey->filled = SURVEY_INFO_IN_USE; ret = qtnf_cmd_get_chan_stats(mac, chan->center_freq, survey); @@ -778,7 +780,7 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev, static int qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, - struct cfg80211_chan_def *chandef) + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct net_device *ndev = wdev->netdev; struct qtnf_vif *vif; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index c68563c83098..3d734a7a5ba8 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2005,7 +2005,7 @@ int qtnf_cmd_send_scan(struct qtnf_wmac *mac) dwell_active = scan_req->duration; dwell_passive = scan_req->duration; } else if (wdev->iftype == NL80211_IFTYPE_STATION && - wdev->current_bss) { + wdev->connected) { /* let device select dwell based on traffic conditions */ dwell_active = QTNF_SCAN_TIME_AUTO; dwell_passive = QTNF_SCAN_TIME_AUTO; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 8dc80574d08d..4fafe370101a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -189,7 +189,7 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, vif->mac->macid, vif->vifid, join_info->bssid, chandef.chan->hw_value); - if (!vif->wdev.ssid_len) { + if (!vif->wdev.u.client.ssid_len) { pr_warn("VIF%u.%u: SSID unknown for BSS:%pM\n", vif->mac->macid, vif->vifid, join_info->bssid); @@ -197,7 +197,7 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, goto done; } - ie = kzalloc(2 + vif->wdev.ssid_len, GFP_KERNEL); + ie = kzalloc(2 + vif->wdev.u.client.ssid_len, GFP_KERNEL); if (!ie) { pr_warn("VIF%u.%u: IE alloc failed for BSS:%pM\n", vif->mac->macid, vif->vifid, @@ -207,14 +207,15 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, } ie[0] = WLAN_EID_SSID; - ie[1] = vif->wdev.ssid_len; - memcpy(ie + 2, vif->wdev.ssid, vif->wdev.ssid_len); + ie[1] = vif->wdev.u.client.ssid_len; + memcpy(ie + 2, vif->wdev.u.client.ssid, + vif->wdev.u.client.ssid_len); bss = cfg80211_inform_bss(wiphy, chandef.chan, CFG80211_BSS_FTYPE_UNKNOWN, join_info->bssid, 0, WLAN_CAPABILITY_ESS, 100, - ie, 2 + vif->wdev.ssid_len, + ie, 2 + vif->wdev.u.client.ssid_len, 0, GFP_KERNEL); if (!bss) { pr_warn("VIF%u.%u: can't connect to unknown BSS: %pM\n", @@ -470,14 +471,14 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, continue; if (vif->wdev.iftype == NL80211_IFTYPE_STATION && - !vif->wdev.current_bss) + !vif->wdev.connected) continue; if (!vif->netdev) continue; mutex_lock(&vif->wdev.mtx); - cfg80211_ch_switch_notify(vif->netdev, &chandef); + cfg80211_ch_switch_notify(vif->netdev, &chandef, 0); mutex_unlock(&vif->wdev.mtx); } diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 43b5604c0bca..349aa3c4b668 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -2086,6 +2086,7 @@ static u8 rtw_get_chan_type(struct adapter *adapter) } static int cfg80211_rtw_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct adapter *adapter = wiphy_to_adapter(wiphy); @@ -2446,7 +2447,8 @@ static int cfg80211_rtw_change_beacon(struct wiphy *wiphy, struct net_device *nd return rtw_add_beacon(adapter, info->head, info->head_len, info->tail, info->tail_len); } -static int cfg80211_rtw_stop_ap(struct wiphy *wiphy, struct net_device *ndev) +static int cfg80211_rtw_stop_ap(struct wiphy *wiphy, struct net_device *ndev, + unsigned int link_id) { return 0; } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 5c65ae6b8154..e15771965916 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4376,4 +4376,7 @@ enum ieee80211_range_params_max_total_ltf { IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED, }; +/* multi-link device */ +#define IEEE80211_MLD_MAX_NUM_LINKS 15 + #endif /* LINUX_IEEE80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6d02e12e4702..772e099fc932 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1158,6 +1158,7 @@ struct cfg80211_mbssid_elems { /** * struct cfg80211_beacon_data - beacon data + * @link_id: the link ID for the AP MLD link sending this beacon * @head: head portion of beacon (before TIM IE) * or %NULL if not changed * @tail: tail portion of beacon (after TIM IE) @@ -1188,6 +1189,8 @@ struct cfg80211_mbssid_elems { * attribute is present in beacon data or not. */ struct cfg80211_beacon_data { + unsigned int link_id; + const u8 *head, *tail; const u8 *beacon_ies; const u8 *proberesp_ies; @@ -4201,7 +4204,8 @@ struct cfg80211_ops { struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_beacon_data *info); - int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev); + int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id); int (*add_station)(struct wiphy *wiphy, struct net_device *dev, @@ -4309,6 +4313,7 @@ struct cfg80211_ops { int (*set_bitrate_mask)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, const u8 *peer, const struct cfg80211_bitrate_mask *mask); @@ -4384,6 +4389,7 @@ struct cfg80211_ops { int (*get_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef); int (*start_p2p_device)(struct wiphy *wiphy, @@ -4420,6 +4426,7 @@ struct cfg80211_ops { struct cfg80211_qos_map *qos_map); int (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, struct cfg80211_chan_def *chandef); int (*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev, @@ -4545,10 +4552,14 @@ struct cfg80211_ops { * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation * before connection. * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys + * @WIPHY_FLAG_SUPPORTS_MLO: This is a temporary flag gating the MLO APIs, + * in order to not have them reachable in normal drivers, until we have + * complete feature/interface combinations/etc. advertisement. No driver + * should set this flag for now. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), - /* use hole at 1 */ + WIPHY_FLAG_SUPPORTS_MLO = BIT(1), WIPHY_FLAG_SPLIT_SCAN_6GHZ = BIT(2), WIPHY_FLAG_NETNS_OK = BIT(3), WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), @@ -5505,6 +5516,8 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @netdev: (private) Used to reference back to the netdev, may be %NULL * @identifier: (private) Identifier used in nl80211 to identify this * wireless device if it has no netdev + * @connected_addr: (private) BSSID or AP MLD address if connected + * @connected: indicates if connected or not (STA mode) * @current_bss: (private) Used by the internal configuration code * @chandef: (private) Used by the internal configuration code to track * the user-set channel definition. @@ -5585,8 +5598,6 @@ struct wireless_dev { u8 address[ETH_ALEN] __aligned(sizeof(u16)); /* currently used for IBSS and SME - might be rearranged later */ - u8 ssid[IEEE80211_MAX_SSID_LEN]; - u8 ssid_len, mesh_id_len, mesh_id_up_len; struct cfg80211_conn *conn; struct cfg80211_cached_keys *connect_keys; enum ieee80211_bss_type conn_bss_type; @@ -5598,20 +5609,17 @@ struct wireless_dev { struct list_head event_list; spinlock_t event_lock; - struct cfg80211_internal_bss *current_bss; /* associated / joined */ - struct cfg80211_chan_def preset_chandef; - struct cfg80211_chan_def chandef; + u8 connected:1; bool ps; int ps_timeout; - int beacon_interval; - u32 ap_unexpected_nlportid; u32 owner_nlportid; bool nl_owner_dead; + /* FIXME: need to rework radar detection for MLO */ bool cac_started; unsigned long cac_start_time; unsigned int cac_time_ms; @@ -5639,6 +5647,50 @@ struct wireless_dev { struct work_struct pmsr_free_wk; unsigned long unprot_beacon_reported; + + union { + struct { + u8 connected_addr[ETH_ALEN] __aligned(2); + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } client; + struct { + int beacon_interval; + struct cfg80211_chan_def preset_chandef; + struct cfg80211_chan_def chandef; + u8 id[IEEE80211_MAX_SSID_LEN]; + u8 id_len, id_up_len; + } mesh; + struct { + struct cfg80211_chan_def preset_chandef; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } ap; + struct { + struct cfg80211_internal_bss *current_bss; + struct cfg80211_chan_def chandef; + int beacon_interval; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } ibss; + struct { + struct cfg80211_chan_def chandef; + } ocb; + } u; + + struct { + u8 addr[ETH_ALEN] __aligned(2); + union { + struct { + unsigned int beacon_interval; + struct cfg80211_chan_def chandef; + } ap; + struct { + struct cfg80211_internal_bss *current_bss; + } client; + }; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; + u16 valid_links; }; static inline const u8 *wdev_address(struct wireless_dev *wdev) @@ -5667,6 +5719,31 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } +/** + * wdev_chandef - return chandef pointer from wireless_dev + * @wdev: the wdev + * @link_id: the link ID for MLO + * + * Return: The chandef depending on the mode, or %NULL. + */ +struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, + unsigned int link_id); + +static inline void WARN_INVALID_LINK_ID(struct wireless_dev *wdev, + unsigned int link_id) +{ + WARN_ON(link_id && !wdev->valid_links); + WARN_ON(wdev->valid_links && + !(wdev->valid_links & BIT(link_id))); +} + +#define for_each_valid_link(wdev, link_id) \ + for (link_id = 0; \ + link_id < ((wdev)->valid_links ? ARRAY_SIZE((wdev)->links) : 1); \ + link_id++) \ + if (!(wdev)->valid_links || \ + ((wdev)->valid_links & BIT(link_id))) + /** * DOC: Utility functions * @@ -7882,12 +7959,14 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * cfg80211_ch_switch_notify - update wdev channel and notify userspace * @dev: the device which switched channels * @chandef: the new channel definition + * @link_id: the link ID for MLO, must be 0 for non-MLO * * Caller must acquire wdev_lock, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef); + struct cfg80211_chan_def *chandef, + unsigned int link_id); /* * cfg80211_ch_switch_started_notify - notify channel switch start diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98f905f16411..a9a2c9fef295 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -323,6 +323,17 @@ * Once the association is done, the driver cleans the FILS AAD data. */ +/** + * DOC: Multi-Link Operation + * + * In Multi-Link Operation, a connection between to MLDs utilizes multiple + * links. To use this in nl80211, various commands and responses now need + * to or will include the new %NL80211_ATTR_MLO_LINKS attribute. + * Additionally, various commands that need to operate on a specific link + * now need to be given the %NL80211_ATTR_MLO_LINK_ID attribute, e.g. to + * use %NL80211_CMD_START_AP or similar functions. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -1237,6 +1248,12 @@ * to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to * specify the timeout value. * + * @NL80211_CMD_ADD_LINK: Add a new link to an interface. The + * %NL80211_ATTR_MLO_LINK_ID attribute is used for the new link. + * @NL80211_CMD_REMOVE_LINK: Remove a link from an interface. This may come + * without %NL80211_ATTR_MLO_LINK_ID as an easy way to remove all links + * in preparation for e.g. roaming to a regular (non-MLO) AP. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1481,6 +1498,9 @@ enum nl80211_commands { NL80211_CMD_ASSOC_COMEBACK, + NL80211_CMD_ADD_LINK, + NL80211_CMD_REMOVE_LINK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2663,6 +2683,11 @@ enum nl80211_commands { * association request when used with NL80211_CMD_NEW_STATION). Can be set * only if %NL80211_STA_FLAG_WME is set. * + * @NL80211_ATTR_MLO_LINK_ID: A (u8) link ID for use with MLO, to be used with + * various commands that need a link ID to operate. + * @NL80211_ATTR_MLO_LINKS: A nested array of links, each containing some + * per-link information and a link ID. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3177,6 +3202,9 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_EHT, + NL80211_ATTR_MLO_LINKS, + NL80211_ATTR_MLO_LINK_ID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 881efbfb96f6..362cac9e2135 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1342,7 +1342,8 @@ static void ieee80211_free_next_beacon(struct ieee80211_sub_if_data *sdata) sdata->u.ap.next_beacon = NULL; } -static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_sub_if_data *vlan; @@ -3049,6 +3050,7 @@ static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { @@ -3390,7 +3392,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) if (err) return err; - cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef, 0); return 0; } @@ -3898,6 +3900,7 @@ unlock: static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); @@ -3958,6 +3961,7 @@ static int ieee80211_set_qos_map(struct wiphy *wiphy, static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6d5ad71ef02c..e0a9b7d63071 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1314,7 +1314,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) return; } - cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef); + cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef, 0); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 550ac9d827fe..e68923200018 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -1,4 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * Parts of this file are + * Copyright (C) 2022 Intel Corporation + */ #include #include #include @@ -7,8 +11,9 @@ #include "rdev-ops.h" -int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify) +static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, unsigned int link_id, + bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -22,15 +27,16 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - if (!wdev->beacon_interval) + if (!wdev->links[link_id].ap.beacon_interval) return -ENOENT; - err = rdev_stop_ap(rdev, dev); + err = rdev_stop_ap(rdev, dev, link_id); if (!err) { wdev->conn_owner_nlportid = 0; - wdev->beacon_interval = 0; - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); - wdev->ssid_len = 0; + wdev->links[link_id].ap.beacon_interval = 0; + memset(&wdev->links[link_id].ap.chandef, 0, + sizeof(wdev->links[link_id].ap.chandef)); + wdev->u.ap.ssid_len = 0; rdev_set_qos_map(rdev, dev, NULL); if (notify) nl80211_send_ap_stopped(wdev); @@ -46,14 +52,36 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, int link_id, + bool notify) +{ + unsigned int link; + int ret = 0; + + if (link_id >= 0) + return ___cfg80211_stop_ap(rdev, dev, link_id, notify); + + for_each_valid_link(dev->ieee80211_ptr, link) { + int ret1 = ___cfg80211_stop_ap(rdev, dev, link, notify); + + if (ret1) + ret = ret1; + /* try the next one also if one errored */ + } + + return ret; +} + int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify) + struct net_device *dev, int link_id, + bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; wdev_lock(wdev); - err = __cfg80211_stop_ap(rdev, dev, notify); + err = __cfg80211_stop_ap(rdev, dev, link_id, notify); wdev_unlock(wdev); return err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index f74f176e0d9d..efc2de4bab57 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -672,14 +672,21 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, * range of chandef. */ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, - struct ieee80211_channel *chan) + struct ieee80211_channel *chan, + bool primary_only) { int width; u32 freq; + if (!chandef->chan) + return false; + if (chandef->chan->center_freq == chan->center_freq) return true; + if (primary_only) + return false; + width = cfg80211_chandef_get_width(chandef); if (width <= 20) return false; @@ -704,23 +711,25 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) { - bool active = false; + unsigned int link; ASSERT_WDEV_LOCK(wdev); - if (!wdev->chandef.chan) - return false; - switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - active = wdev->beacon_interval != 0; + for_each_valid_link(wdev, link) { + if (wdev->links[link].ap.beacon_interval) + return true; + } break; case NL80211_IFTYPE_ADHOC: - active = wdev->ssid_len != 0; + if (wdev->u.ibss.ssid_len) + return true; break; case NL80211_IFTYPE_MESH_POINT: - active = wdev->mesh_id_len != 0; + if (wdev->u.mesh.id_len) + return true; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_OCB: @@ -737,7 +746,35 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) WARN_ON(1); } - return active; + return false; +} + +bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, + struct ieee80211_channel *chan, + bool primary_only) +{ + unsigned int link; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + for_each_valid_link(wdev, link) { + if (cfg80211_is_sub_chan(&wdev->links[link].ap.chandef, + chan, primary_only)) + return true; + } + break; + case NL80211_IFTYPE_ADHOC: + return cfg80211_is_sub_chan(&wdev->u.ibss.chandef, chan, + primary_only); + case NL80211_IFTYPE_MESH_POINT: + return cfg80211_is_sub_chan(&wdev->u.mesh.chandef, chan, + primary_only); + default: + break; + } + + return false; } static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, @@ -752,7 +789,7 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, continue; } - if (cfg80211_is_sub_chan(&wdev->chandef, chan)) { + if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) { wdev_unlock(wdev); return true; } @@ -772,7 +809,8 @@ cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev, if (!cfg80211_chandef_valid(&rdev->background_radar_chandef)) return false; - return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel); + return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel, + false); } bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, @@ -1176,6 +1214,68 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_usable); +static bool cfg80211_ir_permissive_check_wdev(enum nl80211_iftype iftype, + struct wireless_dev *wdev, + struct ieee80211_channel *chan) +{ + struct ieee80211_channel *other_chan = NULL; + unsigned int link_id; + int r1, r2; + + for_each_valid_link(wdev, link_id) { + if (wdev->iftype == NL80211_IFTYPE_STATION && + wdev->links[link_id].client.current_bss) + other_chan = wdev->links[link_id].client.current_bss->pub.channel; + + /* + * If a GO already operates on the same GO_CONCURRENT channel, + * this one (maybe the same one) can beacon as well. We allow + * the operation even if the station we relied on with + * GO_CONCURRENT is disconnected now. But then we must make sure + * we're not outdoor on an indoor-only channel. + */ + if (iftype == NL80211_IFTYPE_P2P_GO && + wdev->iftype == NL80211_IFTYPE_P2P_GO && + wdev->links[link_id].ap.beacon_interval && + !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) + other_chan = wdev->links[link_id].ap.chandef.chan; + + if (!other_chan) + continue; + + if (chan == other_chan) + return true; + + if (chan->band != NL80211_BAND_5GHZ && + chan->band != NL80211_BAND_6GHZ) + continue; + + r1 = cfg80211_get_unii(chan->center_freq); + r2 = cfg80211_get_unii(other_chan->center_freq); + + if (r1 != -EINVAL && r1 == r2) { + /* + * At some locations channels 149-165 are considered a + * bundle, but at other locations, e.g., Indonesia, + * channels 149-161 are considered a bundle while + * channel 165 is left out and considered to be in a + * different bundle. Thus, in case that there is a + * station interface connected to an AP on channel 165, + * it is assumed that channels 149-161 are allowed for + * GO operations. However, having a station interface + * connected to an AP on channels 149-161, does not + * allow GO operation on channel 165. + */ + if (chan->center_freq == 5825 && + other_chan->center_freq != 5825) + continue; + return true; + } + } + + return false; +} + /* * Check if the channel can be used under permissive conditions mandated by * some regulatory bodies, i.e., the channel is marked with @@ -1219,59 +1319,14 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, * the current registered device. */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - struct ieee80211_channel *other_chan = NULL; - int r1, r2; + bool ret; wdev_lock(wdev); - if (wdev->iftype == NL80211_IFTYPE_STATION && - wdev->current_bss) - other_chan = wdev->current_bss->pub.channel; - - /* - * If a GO already operates on the same GO_CONCURRENT channel, - * this one (maybe the same one) can beacon as well. We allow - * the operation even if the station we relied on with - * GO_CONCURRENT is disconnected now. But then we must make sure - * we're not outdoor on an indoor-only channel. - */ - if (iftype == NL80211_IFTYPE_P2P_GO && - wdev->iftype == NL80211_IFTYPE_P2P_GO && - wdev->beacon_interval && - !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) - other_chan = wdev->chandef.chan; + ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan); wdev_unlock(wdev); - if (!other_chan) - continue; - - if (chan == other_chan) - return true; - - if (chan->band != NL80211_BAND_5GHZ && - chan->band != NL80211_BAND_6GHZ) - continue; - - r1 = cfg80211_get_unii(chan->center_freq); - r2 = cfg80211_get_unii(other_chan->center_freq); - - if (r1 != -EINVAL && r1 == r2) { - /* - * At some locations channels 149-165 are considered a - * bundle, but at other locations, e.g., Indonesia, - * channels 149-161 are considered a bundle while - * channel 165 is left out and considered to be in a - * different bundle. Thus, in case that there is a - * station interface connected to an AP on channel 165, - * it is assumed that channels 149-161 are allowed for - * GO operations. However, having a station interface - * connected to an AP on channels 149-161, does not - * allow GO operation on channel 165. - */ - if (chan->center_freq == 5825 && - other_chan->center_freq != 5825) - continue; - return true; - } + if (ret) + return ret; } return false; @@ -1374,3 +1429,24 @@ bool cfg80211_any_usable_channels(struct wiphy *wiphy, return false; } EXPORT_SYMBOL(cfg80211_any_usable_channels); + +struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, + unsigned int link_id) +{ + ASSERT_WDEV_LOCK(wdev); + + switch (wdev->iftype) { + case NL80211_IFTYPE_MESH_POINT: + return &wdev->u.mesh.chandef; + case NL80211_IFTYPE_ADHOC: + return &wdev->u.ibss.chandef; + case NL80211_IFTYPE_OCB: + return &wdev->u.ocb.chandef; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + return &wdev->links[link_id].ap.chandef; + default: + return NULL; + } +} +EXPORT_SYMBOL(wdev_chandef); diff --git a/net/wireless/core.c b/net/wireless/core.c index f08d4b3bb148..3e5d12040726 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1118,6 +1118,7 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, bool unregister_netdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + unsigned int link_id; ASSERT_RTNL(); lockdep_assert_held(&rdev->wiphy.mtx); @@ -1167,11 +1168,22 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, */ cfg80211_process_wdev_events(wdev); - if (WARN_ON(wdev->current_bss)) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; + if (wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) { + for (link_id = 0; link_id < ARRAY_SIZE(wdev->links); link_id++) { + struct cfg80211_internal_bss *curbss; + + curbss = wdev->links[link_id].client.current_bss; + + if (WARN_ON(curbss)) { + cfg80211_unhold_bss(curbss); + cfg80211_put_bss(wdev->wiphy, &curbss->pub); + wdev->links[link_id].client.current_bss = NULL; + } + } } + + wdev->connected = false; } void cfg80211_unregister_wdev(struct wireless_dev *wdev) @@ -1233,7 +1245,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, dev, true); + __cfg80211_stop_ap(rdev, dev, -1, true); break; case NL80211_IFTYPE_OCB: __cfg80211_leave_ocb(rdev, dev); @@ -1463,9 +1475,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, memcpy(&setup, &default_mesh_setup, sizeof(setup)); /* back compat only needed for mesh_id */ - setup.mesh_id = wdev->ssid; - setup.mesh_id_len = wdev->mesh_id_up_len; - if (wdev->mesh_id_up_len) + setup.mesh_id = wdev->u.mesh.id; + setup.mesh_id_len = wdev->u.mesh.id_up_len; + if (wdev->u.mesh.id_up_len) __cfg80211_join_mesh(rdev, dev, &setup, &default_mesh_config); diff --git a/net/wireless/core.h b/net/wireless/core.h index 5436ada91b1a..2c195067ddff 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -307,6 +307,7 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + unsigned int link, struct ieee80211_channel *channel); /* IBSS */ @@ -353,9 +354,11 @@ int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, /* AP */ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify); + struct net_device *dev, int link, + bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify); + struct net_device *dev, int link, + bool notify); /* MLME */ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, @@ -507,7 +510,11 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev); bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, - struct ieee80211_channel *chan); + struct ieee80211_channel *chan, + bool primary_only); +bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, + struct ieee80211_channel *chan, + bool primary_only); static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 5d89eec2869a..4935f94d1acc 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -28,7 +28,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return; - if (!wdev->ssid_len) + if (!wdev->u.ibss.ssid_len) return; bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, NULL, 0, @@ -37,13 +37,13 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, if (WARN_ON(!bss)) return; - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + if (wdev->u.ibss.current_bss) { + cfg80211_unhold_bss(wdev->u.ibss.current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub); } cfg80211_hold_bss(bss_from_pub(bss)); - wdev->current_bss = bss_from_pub(bss); + wdev->u.ibss.current_bss = bss_from_pub(bss); if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) cfg80211_upload_connect_keys(wdev); @@ -96,7 +96,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, lockdep_assert_held(&rdev->wiphy.mtx); ASSERT_WDEV_LOCK(wdev); - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) return -EALREADY; if (!params->basic_rates) { @@ -131,7 +131,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, kfree_sensitive(wdev->connect_keys); wdev->connect_keys = connkeys; - wdev->chandef = params->chandef; + wdev->u.ibss.chandef = params->chandef; if (connkeys) { params->wep_keys = connkeys->params; params->wep_tx_key = connkeys->def; @@ -146,8 +146,8 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, return err; } - memcpy(wdev->ssid, params->ssid, params->ssid_len); - wdev->ssid_len = params->ssid_len; + memcpy(wdev->u.ibss.ssid, params->ssid, params->ssid_len); + wdev->u.ibss.ssid_len = params->ssid_len; return 0; } @@ -173,14 +173,14 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) for (i = 0; i < 6; i++) rdev_del_key(rdev, dev, i, false, NULL); - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + if (wdev->u.ibss.current_bss) { + cfg80211_unhold_bss(wdev->u.ibss.current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub); } - wdev->current_bss = NULL; - wdev->ssid_len = 0; - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); + wdev->u.ibss.current_bss = NULL; + wdev->u.ibss.ssid_len = 0; + memset(&wdev->u.ibss.chandef, 0, sizeof(wdev->u.ibss.chandef)); #ifdef CONFIG_CFG80211_WEXT if (!nowext) wdev->wext.ibss.ssid_len = 0; @@ -205,7 +205,7 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (!wdev->ssid_len) + if (!wdev->u.ibss.ssid_len) return -ENOLINK; err = rdev_leave_ibss(rdev, dev); @@ -339,7 +339,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev_lock(wdev); err = 0; - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) err = __cfg80211_leave_ibss(rdev, dev, true); wdev_unlock(wdev); @@ -374,8 +374,8 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev, return -EINVAL; wdev_lock(wdev); - if (wdev->current_bss) - chan = wdev->current_bss->pub.channel; + if (wdev->u.ibss.current_bss) + chan = wdev->u.ibss.current_bss->pub.channel; else if (wdev->wext.ibss.chandef.chan) chan = wdev->wext.ibss.chandef.chan; wdev_unlock(wdev); @@ -408,7 +408,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, wdev_lock(wdev); err = 0; - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) err = __cfg80211_leave_ibss(rdev, dev, true); wdev_unlock(wdev); @@ -419,8 +419,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - memcpy(wdev->ssid, ssid, len); - wdev->wext.ibss.ssid = wdev->ssid; + memcpy(wdev->u.ibss.ssid, ssid, len); + wdev->wext.ibss.ssid = wdev->u.ibss.ssid; wdev->wext.ibss.ssid_len = len; wdev_lock(wdev); @@ -443,10 +443,10 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev, data->flags = 0; wdev_lock(wdev); - if (wdev->ssid_len) { + if (wdev->u.ibss.ssid_len) { data->flags = 1; - data->length = wdev->ssid_len; - memcpy(ssid, wdev->ssid, data->length); + data->length = wdev->u.ibss.ssid_len; + memcpy(ssid, wdev->u.ibss.ssid, data->length); } else if (wdev->wext.ibss.ssid && wdev->wext.ibss.ssid_len) { data->flags = 1; data->length = wdev->wext.ibss.ssid_len; @@ -494,7 +494,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, wdev_lock(wdev); err = 0; - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) err = __cfg80211_leave_ibss(rdev, dev, true); wdev_unlock(wdev); @@ -527,8 +527,9 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; wdev_lock(wdev); - if (wdev->current_bss) - memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN); + if (wdev->u.ibss.current_bss) + memcpy(ap_addr->sa_data, wdev->u.ibss.current_bss->pub.bssid, + ETH_ALEN); else if (wdev->wext.ibss.bssid) memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN); else diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index e4e363138279..59a3c5c092b1 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -1,4 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * Portions + * Copyright (C) 2022 Intel Corporation + */ #include #include #include @@ -114,7 +118,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, setup->is_secure) return -EOPNOTSUPP; - if (wdev->mesh_id_len) + if (wdev->u.mesh.id_len) return -EALREADY; if (!setup->mesh_id_len) @@ -125,7 +129,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!setup->chandef.chan) { /* if no channel explicitly given, use preset channel */ - setup->chandef = wdev->preset_chandef; + setup->chandef = wdev->u.mesh.preset_chandef; } if (!setup->chandef.chan) { @@ -209,10 +213,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, err = rdev_join_mesh(rdev, dev, conf, setup); if (!err) { - memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); - wdev->mesh_id_len = setup->mesh_id_len; - wdev->chandef = setup->chandef; - wdev->beacon_interval = setup->beacon_interval; + memcpy(wdev->u.mesh.id, setup->mesh_id, setup->mesh_id_len); + wdev->u.mesh.id_len = setup->mesh_id_len; + wdev->u.mesh.chandef = setup->chandef; + wdev->u.mesh.beacon_interval = setup->beacon_interval; } return err; @@ -241,15 +245,15 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev, chandef->chan); if (!err) - wdev->chandef = *chandef; + wdev->u.mesh.chandef = *chandef; return err; } - if (wdev->mesh_id_len) + if (wdev->u.mesh.id_len) return -EBUSY; - wdev->preset_chandef = *chandef; + wdev->u.mesh.preset_chandef = *chandef; return 0; } @@ -267,15 +271,16 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, if (!rdev->ops->leave_mesh) return -EOPNOTSUPP; - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) return -ENOTCONN; err = rdev_leave_mesh(rdev, dev); if (!err) { wdev->conn_owner_nlportid = 0; - wdev->mesh_id_len = 0; - wdev->beacon_interval = 0; - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); + wdev->u.mesh.id_len = 0; + wdev->u.mesh.beacon_interval = 0; + memset(&wdev->u.mesh.chandef, 0, + sizeof(wdev->u.mesh.chandef)); rdev_set_qos_map(rdev, dev, NULL); cfg80211_sched_dfs_chan_update(rdev); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c8155a483ec2..fab2d6206cdd 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -92,8 +92,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev, nl80211_send_deauth(rdev, wdev->netdev, buf, len, reconnect, GFP_KERNEL); - if (!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) + if (!wdev->connected || !ether_addr_equal(wdev->u.client.connected_addr, bssid)) return; __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); @@ -113,8 +112,8 @@ static void cfg80211_process_disassoc(struct wireless_dev *wdev, nl80211_send_disassoc(rdev, wdev->netdev, buf, len, reconnect, GFP_KERNEL); - if (WARN_ON(!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + if (WARN_ON(!wdev->connected || + !ether_addr_equal(wdev->u.client.connected_addr, bssid))) return; __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); @@ -260,8 +259,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, if (!key || !key_len || key_idx < 0 || key_idx > 3) return -EINVAL; - if (wdev->current_bss && - ether_addr_equal(bssid, wdev->current_bss->pub.bssid)) + if (wdev->connected && + ether_addr_equal(bssid, wdev->u.client.connected_addr)) return -EALREADY; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, @@ -322,9 +321,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (wdev->current_bss && - (!req->prev_bssid || !ether_addr_equal(wdev->current_bss->pub.bssid, - req->prev_bssid))) + if (wdev->connected && + (!req->prev_bssid || + !ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid))) return -EALREADY; cfg80211_oper_and_ht_capa(&req->ht_capa_mask, @@ -364,13 +363,13 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); if (local_state_change && - (!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + (!wdev->connected || + !ether_addr_equal(wdev->u.client.connected_addr, bssid))) return 0; if (ether_addr_equal(wdev->disconnect_bssid, bssid) || - (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + (wdev->connected && + ether_addr_equal(wdev->u.client.connected_addr, bssid))) wdev->conn_owner_nlportid = 0; return rdev_deauth(rdev, dev, &req); @@ -392,11 +391,12 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (!wdev->current_bss) + if (!wdev->connected) return -ENOTCONN; - if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) - req.bss = &wdev->current_bss->pub; + if (ether_addr_equal(wdev->links[0].client.current_bss->pub.bssid, + bssid)) + req.bss = &wdev->links[0].client.current_bss->pub; else return -ENOTCONN; @@ -405,7 +405,7 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, return err; /* driver should have reported the disassoc */ - WARN_ON(wdev->current_bss); + WARN_ON(wdev->connected); return 0; } @@ -420,10 +420,10 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, if (!rdev->ops->deauth) return; - if (!wdev->current_bss) + if (!wdev->connected) return; - memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); + memcpy(bssid, wdev->u.client.connected_addr, ETH_ALEN); cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); } @@ -676,28 +676,34 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: + /* + * check for IBSS DA must be done by driver as + * cfg80211 doesn't track the stations + */ + if (!wdev->u.ibss.current_bss || + !ether_addr_equal(wdev->u.ibss.current_bss->pub.bssid, + mgmt->bssid)) { + err = -ENOTCONN; + break; + } + break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (!wdev->current_bss) { + if (!wdev->connected) { err = -ENOTCONN; break; } - if (!ether_addr_equal(wdev->current_bss->pub.bssid, + /* FIXME: MLD may address this differently */ + + if (!ether_addr_equal(wdev->u.client.connected_addr, mgmt->bssid)) { err = -ENOTCONN; break; } - /* - * check for IBSS DA must be done by driver as - * cfg80211 doesn't track the stations - */ - if (wdev->iftype == NL80211_IFTYPE_ADHOC) - break; - /* for station, check that DA is the AP */ - if (!ether_addr_equal(wdev->current_bss->pub.bssid, + if (!ether_addr_equal(wdev->u.client.connected_addr, mgmt->da)) { err = -ENOTCONN; break; @@ -743,12 +749,12 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, if (!ieee80211_is_action(mgmt->frame_control) || mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) return -EINVAL; - if (!wdev->current_bss && + if (!wdev->connected && !wiphy_ext_feature_isset( &rdev->wiphy, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) return -EINVAL; - if (wdev->current_bss && + if (wdev->connected && !wiphy_ext_feature_isset( &rdev->wiphy, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) @@ -940,12 +946,16 @@ void cfg80211_cac_event(struct net_device *netdev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; + /* not yet supported */ + if (wdev->valid_links) + return; + trace_cfg80211_cac_event(netdev, event); if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED)) return; - if (WARN_ON(!wdev->chandef.chan)) + if (WARN_ON(!wdev->links[0].ap.chandef.chan)) return; switch (event) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 740b29481bc6..af31978fc9cc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -792,6 +792,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NL80211_EHT_MIN_CAPABILITY_LEN, NL80211_EHT_MAX_CAPABILITY_LEN), [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG }, + [NL80211_ATTR_MLO_LINKS] = + NLA_POLICY_NESTED_ARRAY(nl80211_policy), + [NL80211_ATTR_MLO_LINK_ID] = + NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS), }; /* policy for the key attributes */ @@ -1225,6 +1229,37 @@ static bool nl80211_put_txq_stats(struct sk_buff *msg, /* netlink command implementations */ +/** + * nl80211_link_id - return link ID + * @attrs: attributes to look at + * + * Returns: the link ID or 0 if not given + * + * Note this function doesn't do any validation of the link + * ID validity wrt. links that were actually added, so it must + * be called only from ops with %NL80211_FLAG_MLO_VALID_LINK_ID + * or if additional validation is done. + */ +static unsigned int nl80211_link_id(struct nlattr **attrs) +{ + struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; + + if (!linkid) + return 0; + + return nla_get_u8(linkid); +} + +static int nl80211_link_id_or_invalid(struct nlattr **attrs) +{ + struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; + + if (!linkid) + return -1; + + return nla_get_u8(linkid); +} + struct key_parse { struct key_params p; int idx; @@ -1496,11 +1531,15 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: + if (wdev->u.ibss.current_bss) + return 0; + return -ENOLINK; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (!wdev->current_bss) - return -ENOLINK; - break; + /* for MLO, require driver validation of the link ID */ + if (wdev->connected) + return 0; + return -ENOLINK; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: @@ -3232,12 +3271,14 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct genl_info *info) + struct genl_info *info, + int _link_id) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; struct wireless_dev *wdev = NULL; + int link_id = _link_id; if (dev) wdev = dev->ieee80211_ptr; @@ -3246,6 +3287,12 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, if (wdev) iftype = wdev->iftype; + if (link_id < 0) { + if (wdev && wdev->valid_links) + return -EINVAL; + link_id = 0; + } + result = nl80211_parse_chandef(rdev, info, &chandef); if (result) return result; @@ -3254,49 +3301,48 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, - iftype)) { - result = -EINVAL; - break; - } - if (wdev->beacon_interval) { + iftype)) + return -EINVAL; + if (wdev->links[link_id].ap.beacon_interval) { + struct ieee80211_channel *cur_chan; + if (!dev || !rdev->ops->set_ap_chanwidth || !(rdev->wiphy.features & - NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) { - result = -EBUSY; - break; - } + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) + return -EBUSY; /* Only allow dynamic channel width changes */ - if (chandef.chan != wdev->preset_chandef.chan) { - result = -EBUSY; - break; - } - result = rdev_set_ap_chanwidth(rdev, dev, &chandef); + cur_chan = wdev->links[link_id].ap.chandef.chan; + if (chandef.chan != cur_chan) + return -EBUSY; + + result = rdev_set_ap_chanwidth(rdev, dev, link_id, + &chandef); if (result) - break; + return result; + wdev->links[link_id].ap.chandef = chandef; + } else { + wdev->u.ap.preset_chandef = chandef; } - wdev->preset_chandef = chandef; - result = 0; - break; + return 0; case NL80211_IFTYPE_MESH_POINT: - result = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - break; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); case NL80211_IFTYPE_MONITOR: - result = cfg80211_set_monitor_channel(rdev, &chandef); - break; + return cfg80211_set_monitor_channel(rdev, &chandef); default: - result = -EINVAL; + break; } - return result; + return -EINVAL; } static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + int link_id = nl80211_link_id_or_invalid(info->attrs); struct net_device *netdev = info->user_ptr[1]; - return __nl80211_set_channel(rdev, netdev, info); + return __nl80211_set_channel(rdev, netdev, info, link_id); } static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) @@ -3411,7 +3457,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = __nl80211_set_channel( rdev, nl80211_can_set_dev_channel(wdev) ? netdev : NULL, - info); + info, -1); if (result) goto out; } @@ -3696,15 +3742,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr)) goto nla_put_failure; - if (rdev->ops->get_channel) { - int ret; + if (rdev->ops->get_channel && !wdev->valid_links) { struct cfg80211_chan_def chandef = {}; + int ret; - ret = rdev_get_channel(rdev, wdev, &chandef); - if (ret == 0) { - if (nl80211_send_chandef(msg, &chandef)) - goto nla_put_failure; - } + ret = rdev_get_channel(rdev, wdev, 0, &chandef); + if (ret == 0 && nl80211_send_chandef(msg, &chandef)) + goto nla_put_failure; } if (rdev->ops->get_tx_power) { @@ -3721,27 +3765,24 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->ssid_len && - nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) + if (wdev->u.ap.ssid_len && + nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, + wdev->u.ap.ssid)) goto nla_put_failure_locked; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - case NL80211_IFTYPE_ADHOC: { - const struct element *ssid_elem; - - if (!wdev->current_bss) - break; - rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, - WLAN_EID_SSID); - if (ssid_elem && - nla_put(msg, NL80211_ATTR_SSID, ssid_elem->datalen, - ssid_elem->data)) - goto nla_put_failure_rcu_locked; - rcu_read_unlock(); + if (wdev->u.client.ssid_len && + nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len, + wdev->u.client.ssid)) + goto nla_put_failure_locked; + break; + case NL80211_IFTYPE_ADHOC: + if (wdev->u.ibss.ssid_len && + nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len, + wdev->u.ibss.ssid)) + goto nla_put_failure_locked; break; - } default: /* nothing */ break; @@ -3761,8 +3802,6 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag genlmsg_end(msg, hdr); return 0; - nla_put_failure_rcu_locked: - rcu_read_unlock(); nla_put_failure_locked: wdev_unlock(wdev); nla_put_failure: @@ -4014,10 +4053,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); - wdev->mesh_id_up_len = + wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), - wdev->mesh_id_up_len); + memcpy(wdev->u.mesh.id, + nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->u.mesh.id_up_len); wdev_unlock(wdev); } @@ -4122,10 +4162,11 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); - wdev->mesh_id_up_len = + wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), - wdev->mesh_id_up_len); + memcpy(wdev->u.mesh.id, + nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->u.mesh.id_up_len); wdev_unlock(wdev); break; case NL80211_IFTYPE_NAN: @@ -4662,7 +4703,7 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - if (!dev->ieee80211_ptr->beacon_interval) + if (!dev->ieee80211_ptr->links[0].ap.beacon_interval) return -EINVAL; acl = parse_acl_data(&rdev->wiphy, info); @@ -4818,14 +4859,24 @@ static void he_build_mcs_mask(u16 he_mcs_map, } } -static u16 he_get_txmcsmap(struct genl_info *info, +static u16 he_get_txmcsmap(struct genl_info *info, unsigned int link_id, const struct ieee80211_sta_he_cap *he_cap) { struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - __le16 tx_mcs; + struct cfg80211_chan_def *chandef; + __le16 tx_mcs; - switch (wdev->chandef.width) { + chandef = wdev_chandef(wdev, link_id); + if (!chandef) { + /* + * This is probably broken, but we never maintained + * a chandef in these cases, so it always was. + */ + return le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_80); + } + + switch (chandef->width) { case NL80211_CHAN_WIDTH_80P80: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80p80; break; @@ -4836,6 +4887,7 @@ static u16 he_get_txmcsmap(struct genl_info *info, tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80; break; } + return le16_to_cpu(tx_mcs); } @@ -4843,7 +4895,8 @@ static bool he_set_mcs_mask(struct genl_info *info, struct wireless_dev *wdev, struct ieee80211_supported_band *sband, struct nl80211_txrate_he *txrate, - u16 mcs[NL80211_HE_NSS_MAX]) + u16 mcs[NL80211_HE_NSS_MAX], + unsigned int link_id) { const struct ieee80211_sta_he_cap *he_cap; u16 tx_mcs_mask[NL80211_HE_NSS_MAX] = {}; @@ -4856,7 +4909,7 @@ static bool he_set_mcs_mask(struct genl_info *info, memset(mcs, 0, sizeof(u16) * NL80211_HE_NSS_MAX); - tx_mcs_map = he_get_txmcsmap(info, he_cap); + tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); /* Build he_mcs_mask from HE capabilities */ he_build_mcs_mask(tx_mcs_map, tx_mcs_mask); @@ -4876,7 +4929,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, enum nl80211_attrs attr, struct cfg80211_bitrate_mask *mask, struct net_device *dev, - bool default_all_enabled) + bool default_all_enabled, + unsigned int link_id) { struct nlattr *tb[NL80211_TXRATE_MAX + 1]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4913,7 +4967,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, if (!he_cap) continue; - he_tx_mcs_map = he_get_txmcsmap(info, he_cap); + he_tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); he_build_mcs_mask(he_tx_mcs_map, mask->control[i].he_mcs); mask->control[i].he_gi = 0xFF; @@ -4978,7 +5032,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, if (tb[NL80211_TXRATE_HE] && !he_set_mcs_mask(info, wdev, sband, nla_data(tb[NL80211_TXRATE_HE]), - mask->control[band].he_mcs)) + mask->control[band].he_mcs, + link_id)) return -EINVAL; if (tb[NL80211_TXRATE_HE_GI]) @@ -5215,6 +5270,8 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, memset(bcn, 0, sizeof(*bcn)); + bcn->link_id = nl80211_link_id(attrs); + if (attrs[NL80211_ATTR_BEACON_HEAD]) { bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); @@ -5468,22 +5525,20 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct cfg80211_ap_settings *params) { struct wireless_dev *wdev; - bool ret = false; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) continue; - if (!wdev->preset_chandef.chan) + if (!wdev->u.ap.preset_chandef.chan) continue; - params->chandef = wdev->preset_chandef; - ret = true; - break; + params->chandef = wdev->u.ap.preset_chandef; + return true; } - return ret; + return false; } static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, @@ -5541,6 +5596,7 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings *params; @@ -5553,7 +5609,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->start_ap) return -EOPNOTSUPP; - if (wdev->beacon_interval) + if (wdev->links[link_id].ap.beacon_interval) return -EALREADY; /* these are required for START_AP */ @@ -5595,6 +5651,18 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out; } + + if (wdev->u.ap.ssid_len && + (wdev->u.ap.ssid_len != params->ssid_len || + memcmp(wdev->u.ap.ssid, params->ssid, params->ssid_len))) { + /* require identical SSID for MLO */ + err = -EINVAL; + goto out; + } + } else if (wdev->valid_links) { + /* require SSID for MLO */ + err = -EINVAL; + goto out; } if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) @@ -5662,8 +5730,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_chandef(rdev, info, ¶ms->chandef); if (err) goto out; - } else if (wdev->preset_chandef.chan) { - params->chandef = wdev->preset_chandef; + } else if (wdev->valid_links) { + /* with MLD need to specify the channel configuration */ + err = -EINVAL; + goto out; + } else if (wdev->u.ap.preset_chandef.chan) { + params->chandef = wdev->u.ap.preset_chandef; } else if (!nl80211_get_ap_channel(rdev, params)) { err = -EINVAL; goto out; @@ -5679,7 +5751,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, ¶ms->beacon_rate, - dev, false); + dev, false, link_id); if (err) goto out; @@ -5779,19 +5851,28 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; wdev_lock(wdev); + if (wdev->conn_owner_nlportid && + info->attrs[NL80211_ATTR_SOCKET_OWNER] && + wdev->conn_owner_nlportid != info->snd_portid) { + err = -EINVAL; + goto out_unlock; + } + + /* FIXME: validate MLO/link-id against driver capabilities */ + err = rdev_start_ap(rdev, dev, params); if (!err) { - wdev->preset_chandef = params->chandef; - wdev->beacon_interval = params->beacon_interval; - wdev->chandef = params->chandef; - wdev->ssid_len = params->ssid_len; - memcpy(wdev->ssid, params->ssid, wdev->ssid_len); + wdev->links[link_id].ap.beacon_interval = params->beacon_interval; + wdev->links[link_id].ap.chandef = params->chandef; + wdev->u.ap.ssid_len = params->ssid_len; + memcpy(wdev->u.ap.ssid, params->ssid, + params->ssid_len); if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->conn_owner_nlportid = info->snd_portid; } +out_unlock: wdev_unlock(wdev); - out: kfree(params->acl); kfree(params->beacon.mbssid_ies); @@ -5807,6 +5888,7 @@ out: static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_beacon_data params; @@ -5819,7 +5901,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->change_beacon) return -EOPNOTSUPP; - if (!wdev->beacon_interval) + if (!wdev->links[link_id].ap.beacon_interval) return -EINVAL; err = nl80211_parse_beacon(rdev, info->attrs, ¶ms); @@ -5838,9 +5920,10 @@ out: static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; - return cfg80211_stop_ap(rdev, dev, false); + return cfg80211_stop_ap(rdev, dev, link_id, false); } static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { @@ -7590,7 +7673,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, wdev_lock(wdev); /* If not connected, get default parameters */ - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); else err = rdev_get_mesh_config(rdev, dev, &cur_params); @@ -7971,7 +8054,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, return err; wdev_lock(wdev); - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) err = -ENOLINK; if (!err) @@ -8463,14 +8546,44 @@ int nl80211_parse_random_mac(struct nlattr **attrs, return 0; } -static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev) +static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, + struct ieee80211_channel *chan) { + unsigned int link_id; + bool all_ok = true; + ASSERT_WDEV_LOCK(wdev); if (!cfg80211_beaconing_iface_active(wdev)) return true; - if (!(wdev->chandef.chan->flags & IEEE80211_CHAN_RADAR)) + /* + * FIXME: check if we have a free HW resource/link for chan + * + * This, as well as the FIXME below, requires knowing the link + * capabilities of the hardware. + */ + + /* we cannot leave radar channels */ + for_each_valid_link(wdev, link_id) { + struct cfg80211_chan_def *chandef; + + chandef = wdev_chandef(wdev, link_id); + if (!chandef) + continue; + + /* + * FIXME: don't require all_ok, but rather check only the + * correct HW resource/link onto which 'chan' falls, + * as only that link leaves the channel for doing + * the off-channel operation. + */ + + if (chandef->chan->flags & IEEE80211_CHAN_RADAR) + all_ok = false; + } + + if (all_ok) return true; return regulatory_pre_cac_allowed(wdev->wiphy); @@ -8553,7 +8666,7 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, int err; if (!(wiphy->features & randomness_flag) || - (wdev && wdev->current_bss)) + (wdev && wdev->connected)) return -EOPNOTSUPP; err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask); @@ -8690,17 +8803,14 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->n_channels = i; wdev_lock(wdev); - if (!cfg80211_off_channel_oper_allowed(wdev)) { - struct ieee80211_channel *chan; + for (i = 0; i < request->n_channels; i++) { + struct ieee80211_channel *chan = request->channels[i]; - if (request->n_channels != 1) { - wdev_unlock(wdev); - err = -EBUSY; - goto out_free; - } + /* if we can go off-channel to the target channel we're good */ + if (cfg80211_off_channel_oper_allowed(wdev, chan)) + continue; - chan = request->channels[0]; - if (chan->center_freq != wdev->chandef.chan->center_freq) { + if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) { wdev_unlock(wdev); err = -EBUSY; goto out_free; @@ -9445,7 +9555,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms); if (!err) { - wdev->chandef = chandef; + wdev->links[0].ap.chandef = chandef; wdev->cac_started = true; wdev->cac_start_time = jiffies; wdev->cac_time_ms = cac_time_ms; @@ -9513,6 +9623,7 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb, static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_csa_settings params; @@ -9539,15 +9650,15 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) need_handle_dfs_flag = false; /* useless if AP is not running */ - if (!wdev->beacon_interval) + if (!wdev->links[link_id].ap.beacon_interval) return -ENOTCONN; break; case NL80211_IFTYPE_ADHOC: - if (!wdev->ssid_len) + if (!wdev->u.ibss.ssid_len) return -ENOTCONN; break; case NL80211_IFTYPE_MESH_POINT: - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) return -ENOTCONN; break; default: @@ -9718,6 +9829,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, { struct cfg80211_bss *res = &intbss->pub; const struct cfg80211_bss_ies *ies; + unsigned int link_id; void *hdr; struct nlattr *bss; @@ -9822,13 +9934,15 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, switch (wdev->iftype) { case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - if (intbss == wdev->current_bss && - nla_put_u32(msg, NL80211_BSS_STATUS, - NL80211_BSS_STATUS_ASSOCIATED)) - goto nla_put_failure; + for_each_valid_link(wdev, link_id) { + if (intbss == wdev->links[link_id].client.current_bss && + nla_put_u32(msg, NL80211_BSS_STATUS, + NL80211_BSS_STATUS_ASSOCIATED)) + goto nla_put_failure; + } break; case NL80211_IFTYPE_ADHOC: - if (intbss == wdev->current_bss && + if (intbss == wdev->u.ibss.current_bss && nla_put_u32(msg, NL80211_BSS_STATUS, NL80211_BSS_STATUS_IBSS_JOINED)) goto nla_put_failure; @@ -11362,7 +11476,7 @@ static int nl80211_update_connect_params(struct sk_buff *skb, } wdev_lock(dev->ieee80211_ptr); - if (!wdev->current_bss) + if (!wdev->connected) ret = -ENOLINK; else ret = rdev_update_connect_params(rdev, dev, &connect, changed); @@ -11575,9 +11689,9 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_chan_def chandef; - const struct cfg80211_chan_def *compat_chandef; struct sk_buff *msg; void *hdr; u64 cookie; @@ -11607,10 +11721,22 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, return err; wdev_lock(wdev); - if (!cfg80211_off_channel_oper_allowed(wdev) && - !cfg80211_chandef_identical(&wdev->chandef, &chandef)) { - compat_chandef = cfg80211_chandef_compatible(&wdev->chandef, - &chandef); + if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { + const struct cfg80211_chan_def *oper_chandef, *compat_chandef; + + oper_chandef = wdev_chandef(wdev, link_id); + + if (WARN_ON(!oper_chandef)) { + /* cannot happen since we must beacon to get here */ + WARN_ON(1); + wdev_unlock(wdev); + return -EBUSY; + } + + /* note: returns first one if identical chandefs */ + compat_chandef = cfg80211_chandef_compatible(&chandef, + oper_chandef); + if (compat_chandef != &chandef) { wdev_unlock(wdev); return -EBUSY; @@ -11672,6 +11798,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_bitrate_mask mask; + unsigned int link_id = nl80211_link_id(info->attrs); struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -11683,11 +11810,11 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, - dev, true); + dev, true, link_id); if (err) goto out; - err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + err = rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); out: wdev_unlock(wdev); return err; @@ -11812,7 +11939,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return -EINVAL; wdev_lock(wdev); - if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev)) { + if (params.offchan && + !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { wdev_unlock(wdev); return -EBUSY; } @@ -12030,12 +12158,13 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, * connection is established and enough beacons received to calculate * the average. */ - if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && + if (!wdev->cqm_config->last_rssi_event_value && + wdev->links[0].client.current_bss && rdev->ops->get_station) { struct station_info sinfo = {}; u8 *mac_addr; - mac_addr = wdev->current_bss->pub.bssid; + mac_addr = wdev->links[0].client.current_bss->pub.bssid; err = rdev_get_station(rdev, dev, mac_addr, &sinfo); if (err) @@ -12298,7 +12427,7 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &setup.beacon_rate, - dev, false); + dev, false, 0); if (err) return err; @@ -13268,7 +13397,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]); wdev_lock(wdev); - if (!wdev->current_bss) { + if (!wdev->connected) { err = -ENOTCONN; goto out; } @@ -14537,7 +14666,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->current_bss) + if (wdev->connected) break; err = -ENOTCONN; goto out; @@ -14710,13 +14839,13 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) return -EINVAL; wdev_lock(wdev); - if (!wdev->current_bss) { + if (!wdev->connected) { ret = -ENOTCONN; goto out; } pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (memcmp(pmk_conf.aa, wdev->current_bss->pub.bssid, ETH_ALEN)) { + if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) { ret = -EINVAL; goto out; } @@ -14844,9 +14973,13 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: + if (wdev->u.ibss.current_bss) + break; + err = -ENOTCONN; + goto out; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->current_bss) + if (wdev->connected) break; err = -ENOTCONN; goto out; @@ -14882,12 +15015,14 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ftm_responder_stats ftm_stats = {}; + unsigned int link_id = nl80211_link_id(info->attrs); struct sk_buff *msg; void *hdr; struct nlattr *ftm_stats_attr; int err; - if (wdev->iftype != NL80211_IFTYPE_AP || !wdev->beacon_interval) + if (wdev->iftype != NL80211_IFTYPE_AP || + !wdev->links[link_id].ap.beacon_interval) return -EOPNOTSUPP; err = rdev_get_ftm_responder_stats(rdev, dev, &ftm_stats); @@ -15017,7 +15152,8 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) static int parse_tid_conf(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct net_device *dev, struct cfg80211_tid_cfg *tid_conf, - struct genl_info *info, const u8 *peer) + struct genl_info *info, const u8 *peer, + unsigned int link_id) { struct netlink_ext_ack *extack = info->extack; u64 mask; @@ -15092,7 +15228,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, attr = NL80211_TID_CONFIG_ATTR_TX_RATE; err = nl80211_parse_tx_bitrate_mask(info, attrs, attr, &tid_conf->txrate_mask, dev, - true); + true, link_id); if (err) return err; @@ -15119,6 +15255,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb, { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct cfg80211_tid_config *tid_config; struct nlattr *tid; @@ -15156,7 +15293,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb, ret = parse_tid_conf(rdev, attrs, dev, &tid_config->tid_conf[conf_idx], - info, tid_config->peer); + info, tid_config->peer, link_id); if (ret) goto bad_tid_conf; @@ -15295,6 +15432,62 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, return rdev_set_fils_aad(rdev, dev, &fils_aad); } +static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) +{ + unsigned int link_id = nl80211_link_id(info->attrs); + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (!(wdev->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + return -EINVAL; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + break; + default: + return -EINVAL; + } + + if (!info->attrs[NL80211_ATTR_MAC] || + !is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC]))) + return -EINVAL; + + wdev_lock(wdev); + wdev->valid_links |= BIT(link_id); + ether_addr_copy(wdev->links[link_id].addr, + nla_data(info->attrs[NL80211_ATTR_MAC])); + wdev_unlock(wdev); + + return 0; +} + +static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info) +{ + unsigned int link_id = nl80211_link_id(info->attrs); + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + + /* cannot remove if there's no link */ + if (!info->attrs[NL80211_ATTR_MLO_LINK_ID]) + return -EINVAL; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + break; + default: + return -EINVAL; + } + + /* FIXME: stop the link operations first */ + + wdev_lock(wdev); + wdev->valid_links &= ~BIT(link_id); + eth_zero_addr(wdev->links[link_id].addr); + wdev_unlock(wdev); + + return 0; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -15307,6 +15500,8 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_CLEAR_SKB 0x20 #define NL80211_FLAG_NO_WIPHY_MTX 0x40 +#define NL80211_FLAG_MLO_VALID_LINK_ID 0x80 +#define NL80211_FLAG_MLO_UNSUPPORTED 0x100 #define INTERNAL_FLAG_SELECTORS(__sel) \ SELECTOR(__sel, NONE, 0) /* must be first */ \ @@ -15316,6 +15511,12 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, NL80211_FLAG_NEED_WDEV) \ SELECTOR(__sel, NETDEV, \ NL80211_FLAG_NEED_NETDEV) \ + SELECTOR(__sel, NETDEV_LINK, \ + NL80211_FLAG_NEED_NETDEV | \ + NL80211_FLAG_MLO_VALID_LINK_ID) \ + SELECTOR(__sel, NETDEV_NO_MLO, \ + NL80211_FLAG_NEED_NETDEV | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, WIPHY_RTNL, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_NEED_RTNL) \ @@ -15331,14 +15532,31 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, NETDEV_UP, \ NL80211_FLAG_NEED_NETDEV_UP) \ + SELECTOR(__sel, NETDEV_UP_LINK, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_MLO_VALID_LINK_ID) \ + SELECTOR(__sel, NETDEV_UP_NO_MLO, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ + SELECTOR(__sel, NETDEV_UP_NO_MLO_CLEAR, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_CLEAR_SKB | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_NOTMX, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_NO_WIPHY_MTX) \ + SELECTOR(__sel, NETDEV_UP_NOTMX_NOMLO, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_NO_WIPHY_MTX | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_CLEAR, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP, \ NL80211_FLAG_NEED_WDEV_UP) \ + SELECTOR(__sel, WDEV_UP_LINK, \ + NL80211_FLAG_NEED_WDEV_UP | \ + NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, WDEV_UP_RTNL, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_NEED_RTNL) \ @@ -15362,9 +15580,10 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; - struct wireless_dev *wdev; - struct net_device *dev; + struct wireless_dev *wdev = NULL; + struct net_device *dev = NULL; u32 internal_flags; + int err; if (WARN_ON(ops->internal_flags >= ARRAY_SIZE(nl80211_internal_flags))) return -EINVAL; @@ -15375,8 +15594,8 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, if (internal_flags & NL80211_FLAG_NEED_WIPHY) { rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { - rtnl_unlock(); - return PTR_ERR(rdev); + err = PTR_ERR(rdev); + goto out_unlock; } info->user_ptr[0] = rdev; } else if (internal_flags & NL80211_FLAG_NEED_NETDEV || @@ -15384,17 +15603,18 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { - rtnl_unlock(); - return PTR_ERR(wdev); + err = PTR_ERR(wdev); + goto out_unlock; } dev = wdev->netdev; + dev_hold(dev); rdev = wiphy_to_rdev(wdev->wiphy); if (internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { - rtnl_unlock(); - return -EINVAL; + err = -EINVAL; + goto out_unlock; } info->user_ptr[1] = dev; @@ -15404,14 +15624,44 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, if (internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !wdev_running(wdev)) { - rtnl_unlock(); - return -ENETDOWN; + err = -ENETDOWN; + goto out_unlock; } - dev_hold(dev); info->user_ptr[0] = rdev; } + if (internal_flags & NL80211_FLAG_MLO_VALID_LINK_ID) { + struct nlattr *link_id = info->attrs[NL80211_ATTR_MLO_LINK_ID]; + + if (!wdev) { + err = -EINVAL; + goto out_unlock; + } + + /* MLO -> require valid link ID */ + if (wdev->valid_links && + (!link_id || + !(wdev->valid_links & BIT(nla_get_u16(link_id))))) { + err = -EINVAL; + goto out_unlock; + } + + /* non-MLO -> no link ID attribute accepted */ + if (!wdev->valid_links && link_id) { + err = -EINVAL; + goto out_unlock; + } + } + + if (internal_flags & NL80211_FLAG_MLO_UNSUPPORTED) { + if (info->attrs[NL80211_ATTR_MLO_LINK_ID] || + (wdev && wdev->valid_links)) { + err = -EINVAL; + goto out_unlock; + } + } + if (rdev && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { wiphy_lock(&rdev->wiphy); /* we keep the mutex locked until post_doit */ @@ -15421,6 +15671,10 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, rtnl_unlock(); return 0; +out_unlock: + rtnl_unlock(); + dev_put(dev); + return err; } static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, @@ -15636,6 +15890,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, + /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on key */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, @@ -15659,21 +15914,24 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_START_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_STOP_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_STATION, @@ -15939,7 +16197,9 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), + /* FIXME: requiring a link ID here is probably not good */ + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, @@ -15953,7 +16213,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_REGISTER_FRAME, @@ -16002,7 +16263,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_JOIN_MESH, @@ -16163,7 +16425,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_RADAR_DETECT, @@ -16171,7 +16434,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NO_WIPHY_MTX), + NL80211_FLAG_NO_WIPHY_MTX | + NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, @@ -16217,7 +16481,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_VENDOR, @@ -16240,7 +16505,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_DEL_TX_TS, @@ -16301,7 +16567,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, @@ -16333,7 +16600,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_SET_TID_CONFIG, .doit = nl80211_set_tid_config, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_SET_SAR_SPECS, @@ -16357,6 +16625,19 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, + { + .cmd = NL80211_CMD_ADD_LINK, + .doit = nl80211_add_link, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, + { + .cmd = NL80211_CMD_REMOVE_LINK, + .doit = nl80211_remove_link, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -17984,23 +18265,37 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, } void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + unsigned int link_id) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_WDEV_LOCK(wdev); + WARN_INVALID_LINK_ID(wdev, link_id); - trace_cfg80211_ch_switch_notify(dev, chandef); - - wdev->chandef = *chandef; - wdev->preset_chandef = *chandef; + trace_cfg80211_ch_switch_notify(dev, chandef, link_id); - if ((wdev->iftype == NL80211_IFTYPE_STATION || - wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && - !WARN_ON(!wdev->current_bss)) - cfg80211_update_assoc_bss_entry(wdev, chandef->chan); + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + if (!WARN_ON(!wdev->links[link_id].client.current_bss)) + cfg80211_update_assoc_bss_entry(wdev, link_id, + chandef->chan); + break; + case NL80211_IFTYPE_MESH_POINT: + wdev->u.mesh.chandef = *chandef; + wdev->u.mesh.preset_chandef = *chandef; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + wdev->links[link_id].ap.chandef = *chandef; + break; + default: + WARN_ON(1); + break; + } cfg80211_sched_dfs_chan_update(rdev); diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c index 2d26a6d980bf..27a1732264f9 100644 --- a/net/wireless/ocb.c +++ b/net/wireless/ocb.c @@ -4,6 +4,7 @@ * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research + * Copyright (C) 2022 Intel Corporation * Author: Rostislav Lisovy * Funded by: Volkswagen Group Research */ @@ -34,7 +35,7 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, err = rdev_join_ocb(rdev, dev, setup); if (!err) - wdev->chandef = setup->chandef; + wdev->u.ocb.chandef = setup->chandef; return err; } @@ -69,7 +70,7 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, err = rdev_leave_ocb(rdev, dev); if (!err) - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); + memset(&wdev->u.ocb.chandef, 0, sizeof(wdev->u.ocb.chandef)); return err; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 439bcf52369c..d2300eff03ae 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright(c) 2016-2017 Intel Deutschland GmbH + * Copyright (C) 2018, 2021-2022 Intel Corporation + */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -172,11 +177,11 @@ static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev, } static inline int rdev_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev) + struct net_device *dev, unsigned int link_id) { int ret; - trace_rdev_stop_ap(&rdev->wiphy, dev); - ret = rdev->ops->stop_ap(&rdev->wiphy, dev); + trace_rdev_stop_ap(&rdev->wiphy, dev, link_id); + ret = rdev->ops->stop_ap(&rdev->wiphy, dev, link_id); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -651,12 +656,14 @@ static inline int rdev_testmode_dump(struct cfg80211_registered_device *rdev, static inline int rdev_set_bitrate_mask(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *peer, + struct net_device *dev, unsigned int link_id, + const u8 *peer, const struct cfg80211_bitrate_mask *mask) { int ret; - trace_rdev_set_bitrate_mask(&rdev->wiphy, dev, peer, mask); - ret = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, peer, mask); + trace_rdev_set_bitrate_mask(&rdev->wiphy, dev, link_id, peer, mask); + ret = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, link_id, + peer, mask); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -944,12 +951,13 @@ static inline int rdev_set_noack_map(struct cfg80211_registered_device *rdev, static inline int rdev_get_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { int ret; - trace_rdev_get_channel(&rdev->wiphy, wdev); - ret = rdev->ops->get_channel(&rdev->wiphy, wdev, chandef); + trace_rdev_get_channel(&rdev->wiphy, wdev, link_id); + ret = rdev->ops->get_channel(&rdev->wiphy, wdev, link_id, chandef); trace_rdev_return_chandef(&rdev->wiphy, ret, chandef); return ret; @@ -1107,12 +1115,14 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, static inline int rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct cfg80211_chan_def *chandef) + struct net_device *dev, + unsigned int link_id, + struct cfg80211_chan_def *chandef) { int ret; - trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef); - ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef); + trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef); + ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef); trace_rdev_return_int(&rdev->wiphy, ret); return ret; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 58e83ce642ad..c7383ede794f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -5,7 +5,7 @@ * Copyright 2008-2011 Luis R. Rodriguez * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -2370,6 +2370,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; bool ret; + int link; wdev_lock(wdev); iftype = wdev->iftype; @@ -2378,62 +2379,83 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) if (!wdev->netdev || !netif_running(wdev->netdev)) goto wdev_inactive_unlock; - switch (iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_MESH_POINT: - if (!wdev->beacon_interval) - goto wdev_inactive_unlock; - chandef = wdev->chandef; - break; - case NL80211_IFTYPE_ADHOC: - if (!wdev->ssid_len) - goto wdev_inactive_unlock; - chandef = wdev->chandef; - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - if (!wdev->current_bss || - !wdev->current_bss->pub.channel) - goto wdev_inactive_unlock; - - if (!rdev->ops->get_channel || - rdev_get_channel(rdev, wdev, &chandef)) - cfg80211_chandef_create(&chandef, - wdev->current_bss->pub.channel, - NL80211_CHAN_NO_HT); - break; - case NL80211_IFTYPE_MONITOR: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_DEVICE: - /* no enforcement required */ - break; - default: - /* others not implemented for now */ - WARN_ON(1); - break; - } + for (link = 0; link < ARRAY_SIZE(wdev->links); link++) { + struct ieee80211_channel *chan; - wdev_unlock(wdev); + if (!wdev->valid_links && link > 0) + break; + if (!(wdev->valid_links & BIT(link))) + continue; + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + if (!wdev->u.mesh.beacon_interval) + continue; + chandef = wdev->u.mesh.chandef; + break; + case NL80211_IFTYPE_ADHOC: + if (!wdev->u.ibss.ssid_len) + continue; + chandef = wdev->u.ibss.chandef; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + /* Maybe we could consider disabling that link only? */ + if (!wdev->links[link].client.current_bss) + continue; - switch (iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - wiphy_lock(wiphy); - ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); - wiphy_unlock(wiphy); + chan = wdev->links[link].client.current_bss->pub.channel; + if (!chan) + continue; - return ret; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - return cfg80211_chandef_usable(wiphy, &chandef, - IEEE80211_CHAN_DISABLED); - default: - break; + if (!rdev->ops->get_channel || + rdev_get_channel(rdev, wdev, link, &chandef)) + cfg80211_chandef_create(&chandef, chan, + NL80211_CHAN_NO_HT); + break; + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_DEVICE: + /* no enforcement required */ + break; + default: + /* others not implemented for now */ + WARN_ON(1); + break; + } + + wdev_unlock(wdev); + + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + wiphy_lock(wiphy); + ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, + iftype); + wiphy_unlock(wiphy); + + if (!ret) + return ret; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + ret = cfg80211_chandef_usable(wiphy, &chandef, + IEEE80211_CHAN_DISABLED); + if (!ret) + return ret; + break; + default: + break; + } + + wdev_lock(wdev); } + wdev_unlock(wdev); + return true; wdev_inactive_unlock: @@ -4215,8 +4237,17 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev) * In both cases we should end the CAC on the wdev. */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - if (wdev->cac_started && - !cfg80211_chandef_dfs_usable(&rdev->wiphy, &wdev->chandef)) + struct cfg80211_chan_def *chandef; + + if (!wdev->cac_started) + continue; + + /* FIXME: radar detection is tied to link 0 for now */ + chandef = wdev_chandef(wdev, 0); + if (!chandef) + continue; + + if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef)) rdev_end_cac(rdev, wdev->netdev); } } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 6d82bd9eaf8c..0134e5d5c81a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -5,7 +5,7 @@ * Copyright 2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include #include @@ -2617,7 +2617,8 @@ void cfg80211_bss_iter(struct wiphy *wiphy, spin_lock_bh(&rdev->bss_lock); list_for_each_entry(bss, &rdev->bss_list, list) { - if (!chandef || cfg80211_is_sub_chan(chandef, bss->pub.channel)) + if (!chandef || cfg80211_is_sub_chan(chandef, bss->pub.channel, + false)) iter(wiphy, &bss->pub, iter_data); } @@ -2626,11 +2627,12 @@ void cfg80211_bss_iter(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_bss_iter); void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + unsigned int link_id, struct ieee80211_channel *chan) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - struct cfg80211_internal_bss *cbss = wdev->current_bss; + struct cfg80211_internal_bss *cbss = wdev->links[link_id].client.current_bss; struct cfg80211_internal_bss *new = NULL; struct cfg80211_internal_bss *bss; struct cfg80211_bss *nontrans_bss; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ff4d48fcbfb2..35602201057b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg - * Copyright (C) 2009, 2020 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -454,6 +454,20 @@ void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +static void cfg80211_wdev_release_bsses(struct wireless_dev *wdev) +{ + unsigned int link; + + for_each_valid_link(wdev, link) { + if (!wdev->links[link].client.current_bss) + continue; + cfg80211_unhold_bss(wdev->links[link].client.current_bss); + cfg80211_put_bss(wdev->wiphy, + &wdev->links[link].client.current_bss->pub); + wdev->links[link].client.current_bss = NULL; + } +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) @@ -521,12 +535,11 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, if (!rdev->ops->auth || !rdev->ops->assoc) return -EOPNOTSUPP; - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; + cfg80211_wdev_release_bsses(wdev); + if (wdev->connected) { cfg80211_sme_free(wdev); + wdev->connected = false; } if (wdev->conn) @@ -563,8 +576,8 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, wdev->conn->auto_auth = false; } - wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = wdev->ssid_len; + wdev->conn->params.ssid = wdev->u.client.ssid; + wdev->conn->params.ssid_len = wdev->u.client.ssid_len; /* see if we have the bss already */ bss = cfg80211_get_conn_bss(wdev); @@ -648,7 +661,7 @@ static bool cfg80211_is_all_idle(void) list_for_each_entry(rdev, &cfg80211_rdev_list, list) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); - if (wdev->conn || wdev->current_bss || + if (wdev->conn || wdev->connected || cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; wdev_unlock(wdev); @@ -668,7 +681,6 @@ static void disconnect_work(struct work_struct *work) DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); - /* * API calls for drivers implementing connect/disconnect and * SME event handling @@ -729,23 +741,19 @@ void __cfg80211_connect_result(struct net_device *dev, if (!cr->bss && (cr->status == WLAN_STATUS_SUCCESS)) { WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); cr->bss = cfg80211_get_bss(wdev->wiphy, NULL, cr->bssid, - wdev->ssid, wdev->ssid_len, + wdev->u.client.ssid, wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); if (cr->bss) cfg80211_hold_bss(bss_from_pub(cr->bss)); } - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } + cfg80211_wdev_release_bsses(wdev); if (cr->status != WLAN_STATUS_SUCCESS) { kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; - wdev->ssid_len = 0; + wdev->u.client.ssid_len = 0; wdev->conn_owner_nlportid = 0; if (cr->bss) { cfg80211_unhold_bss(bss_from_pub(cr->bss)); @@ -758,7 +766,9 @@ void __cfg80211_connect_result(struct net_device *dev, if (WARN_ON(!cr->bss)) return; - wdev->current_bss = bss_from_pub(cr->bss); + wdev->links[0].client.current_bss = bss_from_pub(cr->bss); + wdev->connected = true; + ether_addr_copy(wdev->u.client.connected_addr, cr->bss->bssid); if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) cfg80211_upload_connect_keys(wdev); @@ -801,7 +811,7 @@ void cfg80211_connect_done(struct net_device *dev, found = cfg80211_get_bss(wdev->wiphy, NULL, params->bss->bssid, - wdev->ssid, wdev->ssid_len, + wdev->u.client.ssid, wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); if (found) { @@ -906,18 +916,17 @@ void __cfg80211_roamed(struct wireless_dev *wdev, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) goto out; - if (WARN_ON(!wdev->current_bss)) + if (WARN_ON(!wdev->connected)) goto out; - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; + cfg80211_wdev_release_bsses(wdev); if (WARN_ON(!info->bss)) return; cfg80211_hold_bss(bss_from_pub(info->bss)); - wdev->current_bss = bss_from_pub(info->bss); + wdev->links[0].client.current_bss = bss_from_pub(info->bss); + ether_addr_copy(wdev->u.client.connected_addr, info->bss->bssid); wdev->unprot_beacon_reported = 0; nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), @@ -963,8 +972,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, if (!info->bss) { info->bss = cfg80211_get_bss(wdev->wiphy, info->channel, - info->bssid, wdev->ssid, - wdev->ssid_len, + info->bssid, wdev->u.client.ssid, + wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); } @@ -1034,8 +1043,8 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return; - if (WARN_ON(!wdev->current_bss) || - WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + if (WARN_ON(!wdev->connected) || + WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, bssid))) return; nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, @@ -1087,13 +1096,9 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - } - - wdev->current_bss = NULL; - wdev->ssid_len = 0; + cfg80211_wdev_release_bsses(wdev); + wdev->connected = false; + wdev->u.client.ssid_len = 0; wdev->conn_owner_nlportid = 0; kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; @@ -1182,19 +1187,20 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, * already connected, so reject a new SSID unless it's the * same (which is the case for re-association.) */ - if (wdev->ssid_len && - (wdev->ssid_len != connect->ssid_len || - memcmp(wdev->ssid, connect->ssid, wdev->ssid_len))) + if (wdev->u.client.ssid_len && + (wdev->u.client.ssid_len != connect->ssid_len || + memcmp(wdev->u.client.ssid, connect->ssid, wdev->u.client.ssid_len))) return -EALREADY; /* * If connected, reject (re-)association unless prev_bssid * matches the current BSSID. */ - if (wdev->current_bss) { + if (wdev->connected) { if (!prev_bssid) return -EALREADY; - if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) + if (!ether_addr_equal(prev_bssid, + wdev->u.client.connected_addr)) return -ENOTCONN; } @@ -1245,8 +1251,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, } wdev->connect_keys = connkeys; - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; + memcpy(wdev->u.client.ssid, connect->ssid, connect->ssid_len); + wdev->u.client.ssid_len = connect->ssid_len; wdev->conn_bss_type = connect->pbss ? IEEE80211_BSS_TYPE_PBSS : IEEE80211_BSS_TYPE_ESS; @@ -1262,8 +1268,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, * This could be reassoc getting refused, don't clear * ssid_len in that case. */ - if (!wdev->current_bss) - wdev->ssid_len = 0; + if (!wdev->connected) + wdev->u.client.ssid_len = 0; return err; } @@ -1287,7 +1293,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, err = cfg80211_sme_disconnect(wdev, reason); else if (!rdev->ops->disconnect) cfg80211_mlme_down(rdev, dev); - else if (wdev->ssid_len) + else if (wdev->u.client.ssid_len) err = rdev_disconnect(rdev, dev, reason); /* @@ -1295,8 +1301,8 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, * in which case cfg80211_disconnected() will take care of * this later. */ - if (!wdev->current_bss) - wdev->ssid_len = 0; + if (!wdev->connected) + wdev->u.client.ssid_len = 0; return err; } @@ -1320,7 +1326,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, wdev->netdev, false); + __cfg80211_stop_ap(rdev, wdev->netdev, -1, false); break; case NL80211_IFTYPE_MESH_POINT: __cfg80211_leave_mesh(rdev, wdev->netdev); @@ -1332,7 +1338,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) * ops->disconnect not implemented. Otherwise we can * use cfg80211_disconnect. */ - if (rdev->ops->disconnect || wdev->current_bss) + if (rdev->ops->disconnect || wdev->connected) cfg80211_disconnect(rdev, wdev->netdev, WLAN_REASON_DEAUTH_LEAVING, true); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 228079d7690a..3b2c956b8d78 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -569,6 +569,7 @@ TRACE_EVENT(rdev_start_ap, __field(bool, privacy) __field(enum nl80211_auth_type, auth_type) __field(int, inactivity_timeout) + __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; @@ -583,16 +584,17 @@ TRACE_EVENT(rdev_start_ap, __entry->inactivity_timeout = settings->inactivity_timeout; memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1); memcpy(__entry->ssid, settings->ssid, settings->ssid_len); + __entry->link_id = settings->beacon.link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", AP settings - ssid: %s, " CHAN_DEF_PR_FMT ", beacon interval: %d, dtim period: %d, " "hidden ssid: %d, wpa versions: %u, privacy: %s, " - "auth type: %d, inactivity timeout: %d", + "auth type: %d, inactivity timeout: %d, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->ssid, CHAN_DEF_PR_ARG, __entry->beacon_interval, __entry->dtim_period, __entry->hidden_ssid, __entry->wpa_ver, BOOL_TO_STR(__entry->privacy), __entry->auth_type, - __entry->inactivity_timeout) + __entry->inactivity_timeout, __entry->link_id) ); TRACE_EVENT(rdev_change_beacon, @@ -602,6 +604,7 @@ TRACE_EVENT(rdev_change_beacon, TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(int, link_id) __dynamic_array(u8, head, info ? info->head_len : 0) __dynamic_array(u8, tail, info ? info->tail_len : 0) __dynamic_array(u8, beacon_ies, info ? info->beacon_ies_len : 0) @@ -615,6 +618,7 @@ TRACE_EVENT(rdev_change_beacon, WIPHY_ASSIGN; NETDEV_ASSIGN; if (info) { + __entry->link_id = info->link_id; if (info->head) memcpy(__get_dynamic_array(head), info->head, info->head_len); @@ -635,9 +639,30 @@ TRACE_EVENT(rdev_change_beacon, if (info->probe_resp) memcpy(__get_dynamic_array(probe_resp), info->probe_resp, info->probe_resp_len); + } else { + __entry->link_id = -1; } ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) +); + +TRACE_EVENT(rdev_stop_ap, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + unsigned int link_id), + TP_ARGS(wiphy, netdev, link_id), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(unsigned int, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->link_id = link_id; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) ); DECLARE_EVENT_CLASS(wiphy_netdev_evt, @@ -654,11 +679,6 @@ DECLARE_EVENT_CLASS(wiphy_netdev_evt, TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) ); -DEFINE_EVENT(wiphy_netdev_evt, rdev_stop_ap, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), - TP_ARGS(wiphy, netdev) -); - DEFINE_EVENT(wiphy_netdev_evt, rdev_set_rekey_data, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) @@ -1619,20 +1639,24 @@ TRACE_EVENT(rdev_testmode_dump, TRACE_EVENT(rdev_set_bitrate_mask, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + unsigned int link_id, const u8 *peer, const struct cfg80211_bitrate_mask *mask), - TP_ARGS(wiphy, netdev, peer, mask), + TP_ARGS(wiphy, netdev, link_id, peer, mask), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(unsigned int, link_id) MAC_ENTRY(peer) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; + __entry->link_id = link_id; MAC_ASSIGN(peer, peer); ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer)) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, peer: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + MAC_PR_ARG(peer)) ); TRACE_EVENT(rdev_update_mgmt_frame_registrations, @@ -2040,9 +2064,22 @@ TRACE_EVENT(rdev_set_noack_map, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map) ); -DEFINE_EVENT(wiphy_wdev_evt, rdev_get_channel, - TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), - TP_ARGS(wiphy, wdev) +TRACE_EVENT(rdev_get_channel, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id), + TP_ARGS(wiphy, wdev, link_id), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(unsigned int, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->link_id = link_id; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) ); TRACE_EVENT(rdev_return_chandef, @@ -2296,20 +2333,24 @@ TRACE_EVENT(rdev_set_qos_map, TRACE_EVENT(rdev_set_ap_chanwidth, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + unsigned int link_id, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, netdev, chandef), + TP_ARGS(wiphy, netdev, link_id, chandef), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY + __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->link_id = link_id; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, + __entry->link_id) ); TRACE_EVENT(rdev_add_tx_ts, @@ -3022,18 +3063,21 @@ TRACE_EVENT(cfg80211_chandef_dfs_required, TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, - struct cfg80211_chan_def *chandef), - TP_ARGS(netdev, chandef), + struct cfg80211_chan_def *chandef, + unsigned int link_id), + TP_ARGS(netdev, chandef, link_id), TP_STRUCT__entry( NETDEV_ENTRY CHAN_DEF_ENTRY + __field(unsigned int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->link_id = link_id; ), - TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, - NETDEV_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", + NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(cfg80211_ch_switch_started_notify, diff --git a/net/wireless/util.c b/net/wireless/util.c index a60d7d638e72..b7257862e0fe 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include #include @@ -1041,7 +1041,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; dev->ieee80211_ptr->use_4addr = false; - dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); rdev_set_qos_map(rdev, dev, NULL); wdev_unlock(dev->ieee80211_ptr); @@ -1049,7 +1048,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, switch (otype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - cfg80211_stop_ap(rdev, dev, true); + cfg80211_stop_ap(rdev, dev, -1, true); break; case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, false); @@ -1073,6 +1072,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, cfg80211_process_rdev_events(rdev); cfg80211_mlme_purge_registrations(dev->ieee80211_ptr); + + memset(&dev->ieee80211_ptr->u, 0, + sizeof(dev->ieee80211_ptr->u)); + memset(&dev->ieee80211_ptr->links, 0, + sizeof(dev->ieee80211_ptr->links)); } err = rdev_change_virtual_intf(rdev, dev, ntype, params); @@ -1930,6 +1934,24 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); +static int cfg80211_wdev_bi(struct wireless_dev *wdev) +{ + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + WARN_ON(wdev->valid_links); + return wdev->links[0].ap.beacon_interval; + case NL80211_IFTYPE_MESH_POINT: + return wdev->u.mesh.beacon_interval; + case NL80211_IFTYPE_ADHOC: + return wdev->u.ibss.beacon_interval; + default: + break; + } + + return 0; +} + static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, u32 *beacon_int_gcd, bool *beacon_int_different) @@ -1940,19 +1962,27 @@ static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, *beacon_int_different = false; list_for_each_entry(wdev, &wiphy->wdev_list, list) { - if (!wdev->beacon_interval) + int wdev_bi; + + /* this feature isn't supported with MLO */ + if (wdev->valid_links) + continue; + + wdev_bi = cfg80211_wdev_bi(wdev); + + if (!wdev_bi) continue; if (!*beacon_int_gcd) { - *beacon_int_gcd = wdev->beacon_interval; + *beacon_int_gcd = wdev_bi; continue; } - if (wdev->beacon_interval == *beacon_int_gcd) + if (wdev_bi == *beacon_int_gcd) continue; *beacon_int_different = true; - *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval); + *beacon_int_gcd = gcd(*beacon_int_gcd, wdev_bi); } if (new_beacon_int && *beacon_int_gcd != new_beacon_int) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index a32065d600a1..a9767bfe7330 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,7 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation */ #include @@ -415,6 +415,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, int err, i; bool rejoin = false; + if (wdev->valid_links) + return -EINVAL; + if (pairwise && !addr) return -EINVAL; @@ -437,7 +440,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { - if (!wdev->current_bss) + if (!wdev->connected) return -ENOLINK; if (!rdev->ops->set_default_mgmt_key) @@ -450,7 +453,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (remove) { err = 0; - if (wdev->current_bss) { + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) { /* * If removing the current TX key, we will need to * join a new IBSS without the privacy bit clear. @@ -501,7 +506,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return -EINVAL; err = 0; - if (wdev->current_bss) + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) err = rdev_add_key(rdev, dev, idx, pairwise, addr, params); else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) @@ -526,7 +533,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if ((params->cipher == WLAN_CIPHER_SUITE_WEP40 || params->cipher == WLAN_CIPHER_SUITE_WEP104) && (tx_key || (!addr && wdev->wext.default_key == -1))) { - if (wdev->current_bss) { + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) { /* * If we are getting a new TX key from not having * had one before we need to join a new IBSS with @@ -549,7 +558,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC && (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) { - if (wdev->current_bss) + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) err = rdev_set_default_mgmt_key(rdev, dev, idx); if (!err) wdev->wext.default_mgmt_key = idx; @@ -595,6 +606,11 @@ static int cfg80211_wext_siwencode(struct net_device *dev, return -EOPNOTSUPP; wiphy_lock(&rdev->wiphy); + if (wdev->valid_links) { + err = -EOPNOTSUPP; + goto out; + } + idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { idx = wdev->wext.default_key; @@ -613,7 +629,9 @@ static int cfg80211_wext_siwencode(struct net_device *dev, /* No key data - just set the default TX key index */ err = 0; wdev_lock(wdev); - if (wdev->current_bss) + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) err = rdev_set_default_key(rdev, dev, idx, true, true); if (!err) @@ -865,7 +883,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, break; } - ret = rdev_get_channel(rdev, wdev, &chandef); + ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret) break; freq->m = chandef.chan->center_freq; @@ -1270,7 +1288,10 @@ static int cfg80211_wext_siwrate(struct net_device *dev, return -EINVAL; wiphy_lock(&rdev->wiphy); - ret = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + if (dev->ieee80211_ptr->valid_links) + ret = -EOPNOTSUPP; + else + ret = rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); wiphy_unlock(&rdev->wiphy); return ret; @@ -1294,8 +1315,9 @@ static int cfg80211_wext_giwrate(struct net_device *dev, err = 0; wdev_lock(wdev); - if (wdev->current_bss) - memcpy(addr, wdev->current_bss->pub.bssid, ETH_ALEN); + if (!wdev->valid_links && wdev->links[0].client.current_bss) + memcpy(addr, wdev->links[0].client.current_bss->pub.bssid, + ETH_ALEN); else err = -EOPNOTSUPP; wdev_unlock(wdev); @@ -1339,11 +1361,11 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) /* Grab BSSID of current BSS, if any */ wdev_lock(wdev); - if (!wdev->current_bss) { + if (wdev->valid_links || !wdev->links[0].client.current_bss) { wdev_unlock(wdev); return NULL; } - memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); + memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); wdev_unlock(wdev); memset(&sinfo, 0, sizeof(sinfo)); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index cd09a9042261..68f45afc352d 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -3,7 +3,7 @@ * cfg80211 wext compat for managed mode. * * Copyright 2009 Johannes Berg - * Copyright (C) 2009, 2020-2021 Intel Corporation. + * Copyright (C) 2009, 2020-2022 Intel Corporation */ #include @@ -124,9 +124,12 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; + if (wdev->valid_links) + return -EOPNOTSUPP; + wdev_lock(wdev); - if (wdev->current_bss) - chan = wdev->current_bss->pub.channel; + if (wdev->links[0].client.current_bss) + chan = wdev->links[0].client.current_bss->pub.channel; else if (wdev->wext.connect.channel) chan = wdev->wext.connect.channel; wdev_unlock(wdev); @@ -208,15 +211,19 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; + if (wdev->valid_links) + return -EINVAL; + data->flags = 0; wdev_lock(wdev); - if (wdev->current_bss) { + if (wdev->links[0].client.current_bss) { const struct element *ssid_elem; rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, - WLAN_EID_SSID); + ssid_elem = ieee80211_bss_get_elem( + &wdev->links[0].client.current_bss->pub, + WLAN_EID_SSID); if (ssid_elem) { data->flags = 1; data->length = ssid_elem->datalen; @@ -300,8 +307,14 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; wdev_lock(wdev); - if (wdev->current_bss) - memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN); + if (wdev->valid_links) { + wdev_unlock(wdev); + return -EOPNOTSUPP; + } + if (wdev->links[0].client.current_bss) + memcpy(ap_addr->sa_data, + wdev->links[0].client.current_bss->pub.bssid, + ETH_ALEN); else eth_zero_addr(ap_addr->sa_data); wdev_unlock(wdev); -- cgit v1.2.3 From 0f48b8b88aa9ed7b65d7cb55dbc57ec914ddada1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 31 May 2022 14:03:38 +0200 Subject: wifi: ieee80211: add definitions for multi-link element Add the definitions necessary to build and parse some of the multi-link element, the per-STA profile isn't fully included. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 222 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e15771965916..870f659087d6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4379,4 +4379,226 @@ enum ieee80211_range_params_max_total_ltf { /* multi-link device */ #define IEEE80211_MLD_MAX_NUM_LINKS 15 +#define IEEE80211_ML_CONTROL_TYPE 0x0007 +#define IEEE80211_ML_CONTROL_TYPE_BASIC 0 +#define IEEE80211_ML_CONTROL_TYPE_PREQ 1 +#define IEEE80211_ML_CONTROL_TYPE_RECONF 2 +#define IEEE80211_ML_CONTROL_TYPE_TDLS 3 +#define IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS 4 +#define IEEE80211_ML_CONTROL_PRESENCE_MASK 0xfff0 + +struct ieee80211_multi_link_elem { + __le16 control; + u8 variable[]; +} __packed; + +#define IEEE80211_MLC_BASIC_PRES_LINK_ID 0x0010 +#define IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT 0x0020 +#define IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY 0x0040 +#define IEEE80211_MLC_BASIC_PRES_EML_CAPA 0x0080 +#define IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP 0x0100 +#define IEEE80211_MLC_BASIC_PRES_MLD_ID 0x0200 + +#define IEEE80211_MED_SYNC_DELAY_DURATION 0x00ff +#define IEEE80211_MED_SYNC_DELAY_SYNC_OFDM_ED_THRESH 0x0f00 +#define IEEE80211_MED_SYNC_DELAY_SYNC_MAX_NUM_TXOPS 0xf000 + +#define IEEE80211_EML_CAP_EMLSR_SUPP 0x0001 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY 0x000e +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_0US 0 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_32US 1 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_64US 2 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_128US 3 +#define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US 4 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY 0x0070 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_0US 0 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_16US 1 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_32US 2 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_64US 3 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_128US 4 +#define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US 5 +#define IEEE80211_EML_CAP_EMLMR_SUPPORT 0x0080 +#define IEEE80211_EML_CAP_EMLMR_DELAY 0x0700 +#define IEEE80211_EML_CAP_EMLMR_DELAY_0US 0 +#define IEEE80211_EML_CAP_EMLMR_DELAY_32US 1 +#define IEEE80211_EML_CAP_EMLMR_DELAY_64US 2 +#define IEEE80211_EML_CAP_EMLMR_DELAY_128US 3 +#define IEEE80211_EML_CAP_EMLMR_DELAY_256US 4 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT 0x7800 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_0 0 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128US 1 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_256US 2 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_512US 3 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_1TU 4 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_2TU 5 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_4TU 6 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_8TU 7 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_16TU 8 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_32TU 9 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_64TU 10 +#define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU 11 + +#define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f +#define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 +#define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 +#define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 + +struct ieee80211_mle_basic_common_info { + u8 len; + u8 mld_mac_addr[ETH_ALEN]; + u8 variable[]; +} __packed; + +#define IEEE80211_MLC_PREQ_PRES_MLD_ID 0x0010 + +struct ieee80211_mle_preq_common_info { + u8 len; + u8 variable[]; +} __packed; + +#define IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR 0x0010 + +/* no fixed fields in RECONF */ + +struct ieee80211_mle_tdls_common_info { + u8 len; + u8 ap_mld_mac_addr[ETH_ALEN]; +} __packed; + +#define IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR 0x0010 + +/* no fixed fields in PRIO_ACCESS */ + +/** + * ieee80211_mle_common_size - check multi-link element common size + * @data: multi-link element, must already be checked for size using + * ieee80211_mle_size_ok() + */ +static inline u8 ieee80211_mle_common_size(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + u8 common = 0; + + switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { + case IEEE80211_ML_CONTROL_TYPE_BASIC: + common += sizeof(struct ieee80211_mle_basic_common_info); + break; + case IEEE80211_ML_CONTROL_TYPE_PREQ: + common += sizeof(struct ieee80211_mle_preq_common_info); + break; + case IEEE80211_ML_CONTROL_TYPE_RECONF: + if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) + common += ETH_ALEN; + return common; + case IEEE80211_ML_CONTROL_TYPE_TDLS: + common += sizeof(struct ieee80211_mle_tdls_common_info); + break; + case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: + if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) + common += ETH_ALEN; + return common; + default: + WARN_ON(1); + return 0; + } + + return common + mle->variable[0]; +} + +/** + * ieee80211_mle_size_ok - validate multi-link element size + * @data: pointer to the element data + * @len: length of the containing element + */ +static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u8 fixed = sizeof(*mle); + u8 common = 0; + bool check_common_len = false; + u16 control; + + if (len < fixed) + return false; + + control = le16_to_cpu(mle->control); + + switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { + case IEEE80211_ML_CONTROL_TYPE_BASIC: + common += sizeof(struct ieee80211_mle_basic_common_info); + check_common_len = true; + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) + common += 1; + break; + case IEEE80211_ML_CONTROL_TYPE_PREQ: + common += sizeof(struct ieee80211_mle_preq_common_info); + if (control & IEEE80211_MLC_PREQ_PRES_MLD_ID) + common += 1; + check_common_len = true; + break; + case IEEE80211_ML_CONTROL_TYPE_RECONF: + if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) + common += ETH_ALEN; + break; + case IEEE80211_ML_CONTROL_TYPE_TDLS: + common += sizeof(struct ieee80211_mle_tdls_common_info); + check_common_len = true; + break; + case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: + if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) + common += ETH_ALEN; + break; + default: + /* we don't know this type */ + return true; + } + + if (len < fixed + common) + return false; + + if (!check_common_len) + return true; + + /* if present, common length is the first octet there */ + return mle->variable[0] >= common; +} + +enum ieee80211_mle_subelems { + IEEE80211_MLE_SUBELEM_PER_STA_PROFILE = 0, +}; + +#define IEEE80211_MLE_STA_CONTROL_LINK_ID 0x000f +#define IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE 0x0010 +#define IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 +#define IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT 0x0040 +#define IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT 0x0080 +#define IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT 0x0100 +#define IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT 0x0200 +#define IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE 0x0400 +#define IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT 0x0800 + +struct ieee80211_mle_per_sta_profile { + __le16 control; + u8 sta_info_len; + u8 variable[]; +} __packed; + +#define for_each_mle_subelement(_elem, _data, _len) \ + if (ieee80211_mle_size_ok(_data, _len)) \ + for_each_element(_elem, \ + _data + ieee80211_mle_common_size(_data),\ + _len - ieee80211_mle_common_size(_data)) + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 965b57b469a589d64d81b1688b38dcb537011bb0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 15 Jun 2022 09:20:12 -0700 Subject: net: Introduce a new proto_ops ->read_skb() Currently both splice() and sockmap use ->read_sock() to read skb from receive queue, but for sockmap we only read one entire skb at a time, so ->read_sock() is too conservative to use. Introduce a new proto_ops ->read_skb() which supports this sematic, with this we can finally pass the ownership of skb to recv actors. For non-TCP protocols, all ->read_sock() can be simply converted to ->read_skb(). Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20220615162014.89193-3-xiyou.wangcong@gmail.com --- include/linux/net.h | 4 ++++ include/net/tcp.h | 3 +-- include/net/udp.h | 3 +-- net/core/skmsg.c | 20 +++++--------------- net/ipv4/af_inet.c | 3 ++- net/ipv4/tcp.c | 9 +++------ net/ipv4/udp.c | 10 ++++------ net/ipv6/af_inet6.c | 3 ++- net/unix/af_unix.c | 23 +++++++++-------------- 9 files changed, 31 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 12093f4db50c..a03485e8cbb2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -152,6 +152,8 @@ struct module; struct sk_buff; typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); +typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); + struct proto_ops { int family; @@ -214,6 +216,8 @@ struct proto_ops { */ int (*read_sock)(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); + /* This is different from read_sock(), it reads an entire skb at a time. */ + int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor); int (*sendpage_locked)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, diff --git a/include/net/tcp.h b/include/net/tcp.h index 7547d90fbb57..8e48dc56837b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -672,8 +672,7 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); void tcp_initialize_rcv_mss(struct sock *sk); diff --git a/include/net/udp.h b/include/net/udp.h index b60eea2e3fae..987f7fc7c0aa 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -306,8 +306,7 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); -int udp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 7e03f96e441b..f7f63b7d990c 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1160,21 +1160,17 @@ static void sk_psock_done_strp(struct sk_psock *psock) } #endif /* CONFIG_BPF_STREAM_PARSER */ -static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, - unsigned int offset, size_t orig_len) +static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = (struct sock *)desc->arg.data; struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; - int len = orig_len; + int len = skb->len; /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) { - desc->error = -ENOMEM; + if (!skb) return 0; - } rcu_read_lock(); psock = sk_psock(sk); @@ -1204,16 +1200,10 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { struct socket *sock = sk->sk_socket; - read_descriptor_t desc; - if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + if (unlikely(!sock || !sock->ops || !sock->ops->read_skb)) return; - - desc.arg.data = sk; - desc.error = 0; - desc.count = 1; - - sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv); + sock->ops->read_skb(sk, sk_psock_verdict_recv); } void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index da81f56fdd1c..7abd652a558f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1040,6 +1040,7 @@ const struct proto_ops inet_stream_ops = { .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .read_skb = tcp_read_skb, .sendmsg_locked = tcp_sendmsg_locked, .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, @@ -1067,7 +1068,7 @@ const struct proto_ops inet_dgram_ops = { .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, - .read_sock = udp_read_sock, + .read_skb = udp_read_skb, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 124f384f8695..9d2fd3ced21b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1734,8 +1734,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, } EXPORT_SYMBOL(tcp_read_sock); -int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct tcp_sock *tp = tcp_sk(sk); u32 seq = tp->copied_seq; @@ -1750,7 +1749,7 @@ int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, int used; __skb_unlink(skb, &sk->sk_receive_queue); - used = recv_actor(desc, skb, 0, skb->len); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -1765,9 +1764,7 @@ int tcp_read_skb(struct sock *sk, read_descriptor_t *desc, break; } consume_skb(skb); - if (!desc->count) - break; - WRITE_ONCE(tp->copied_seq, seq); + break; } WRITE_ONCE(tp->copied_seq, seq); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6172b4750a88..c660b0bc4d14 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1797,8 +1797,7 @@ busy_check: } EXPORT_SYMBOL(__skb_recv_udp); -int udp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { int copied = 0; @@ -1820,7 +1819,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, continue; } - used = recv_actor(desc, skb, 0, skb->len); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -1831,13 +1830,12 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, } kfree_skb(skb); - if (!desc->count) - break; + break; } return copied; } -EXPORT_SYMBOL(udp_read_sock); +EXPORT_SYMBOL(udp_read_skb); /* * This should be easy, if there is something there we diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 658823e91eca..0ee0770e79aa 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -702,6 +702,7 @@ const struct proto_ops inet6_stream_ops = { .sendpage_locked = tcp_sendpage_locked, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .read_skb = tcp_read_skb, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, @@ -727,7 +728,7 @@ const struct proto_ops inet6_dgram_ops = { .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ - .read_sock = udp_read_sock, + .read_skb = udp_read_skb, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3453e0053f76..1bed3739768c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -741,10 +741,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); @@ -798,7 +796,7 @@ static const struct proto_ops unix_stream_ops = { .shutdown = unix_shutdown, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, - .read_sock = unix_stream_read_sock, + .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, @@ -823,7 +821,7 @@ static const struct proto_ops unix_dgram_ops = { .listen = sock_no_listen, .shutdown = unix_shutdown, .sendmsg = unix_dgram_sendmsg, - .read_sock = unix_read_sock, + .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, @@ -2487,8 +2485,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si return __unix_dgram_recvmsg(sk, msg, size, flags); } -static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { int copied = 0; @@ -2503,7 +2500,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, if (!skb) return err; - used = recv_actor(desc, skb, 0, skb->len); + used = recv_actor(sk, skb); if (used <= 0) { if (!copied) copied = used; @@ -2514,8 +2511,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, } kfree_skb(skb); - if (!desc->count) - break; + break; } return copied; @@ -2650,13 +2646,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, } #endif -static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { if (unlikely(sk->sk_state != TCP_ESTABLISHED)) return -ENOTCONN; - return unix_read_sock(sk, desc, recv_actor); + return unix_read_skb(sk, recv_actor); } static int unix_stream_read_generic(struct unix_stream_read_state *state, -- cgit v1.2.3 From 1ade23711971b0eececf0d7fedc29d3c1d2fce01 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Jun 2022 02:53:42 +0300 Subject: bpf: Inline calls to bpf_loop when callback is known Calls to `bpf_loop` are replaced with direct loops to avoid indirection. E.g. the following: bpf_loop(10, foo, NULL, 0); Is replaced by equivalent of the following: for (int i = 0; i < 10; ++i) foo(i, NULL); This transformation could be applied when: - callback is known and does not change during program execution; - flags passed to `bpf_loop` are always zero. Inlining logic works as follows: - During execution simulation function `update_loop_inline_state` tracks the following information for each `bpf_loop` call instruction: - is callback known and constant? - are flags constant and zero? - Function `optimize_bpf_loop` increases stack depth for functions where `bpf_loop` calls can be inlined and invokes `inline_bpf_loop` to apply the inlining. The additional stack space is used to spill registers R6, R7 and R8. These registers are used as loop counter, loop maximal bound and callback context parameter; Measurements using `benchs/run_bench_bpf_loop.sh` inside QEMU / KVM on i7-4710HQ CPU show a drop in latency from 14 ns/op to 2 ns/op. Signed-off-by: Eduard Zingerman Acked-by: Song Liu Link: https://lore.kernel.org/r/20220620235344.569325-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 + include/linux/bpf_verifier.h | 12 +++ kernel/bpf/bpf_iter.c | 9 ++- kernel/bpf/verifier.c | 180 +++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 195 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0edd7d2c0064..d05e1495a06e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1286,6 +1286,9 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 +/* Maximum number of loops for bpf_loop */ +#define BPF_MAX_LOOPS BIT(23) + #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ BPF_F_WRONLY | \ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3930c963fa67..81b19669efba 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -344,6 +344,14 @@ struct bpf_verifier_state_list { int miss_cnt, hit_cnt; }; +struct bpf_loop_inline_state { + int initialized:1; /* set to true upon first entry */ + int fit_for_inline:1; /* true if callback function is the same + * at each call and flags are always zero + */ + u32 callback_subprogno; /* valid when fit_for_inline is true */ +}; + /* Possible states for alu_state member. */ #define BPF_ALU_SANITIZE_SRC (1U << 0) #define BPF_ALU_SANITIZE_DST (1U << 1) @@ -373,6 +381,10 @@ struct bpf_insn_aux_data { u32 mem_size; /* mem_size for non-struct typed var */ }; } btf_var; + /* if instruction is a call to bpf_loop this field tracks + * the state of the relevant registers to make decision about inlining + */ + struct bpf_loop_inline_state loop_inline_state; }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index d5d96ceca105..7e8fd49406f6 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -723,9 +723,6 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = { .arg4_type = ARG_ANYTHING, }; -/* maximum number of loops */ -#define MAX_LOOPS BIT(23) - BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, u64, flags) { @@ -733,9 +730,13 @@ BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, u64 ret; u32 i; + /* Note: these safety checks are also verified when bpf_loop + * is inlined, be careful to modify this code in sync. See + * function verifier.c:inline_bpf_loop. + */ if (flags) return -EINVAL; - if (nr_loops > MAX_LOOPS) + if (nr_loops > BPF_MAX_LOOPS) return -E2BIG; for (i = 0; i < nr_loops; i++) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2859901ffbe3..bf72dc511df6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7124,6 +7124,41 @@ static int check_get_func_ip(struct bpf_verifier_env *env) return -ENOTSUPP; } +static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +{ + return &env->insn_aux_data[env->insn_idx]; +} + +static bool loop_flag_is_zero(struct bpf_verifier_env *env) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[BPF_REG_4]; + bool reg_is_null = register_is_null(reg); + + if (reg_is_null) + mark_chain_precision(env, BPF_REG_4); + + return reg_is_null; +} + +static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno) +{ + struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state; + + if (!state->initialized) { + state->initialized = 1; + state->fit_for_inline = loop_flag_is_zero(env); + state->callback_subprogno = subprogno; + return; + } + + if (!state->fit_for_inline) + return; + + state->fit_for_inline = (loop_flag_is_zero(env) && + state->callback_subprogno == subprogno); +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -7276,6 +7311,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = check_bpf_snprintf_call(env, regs); break; case BPF_FUNC_loop: + update_loop_inline_state(env, meta.subprogno); err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_loop_callback_state); break; @@ -7682,11 +7718,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, return true; } -static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) -{ - return &env->insn_aux_data[env->insn_idx]; -} - enum { REASON_BOUNDS = -1, REASON_TYPE = -2, @@ -14315,6 +14346,142 @@ patch_call_imm: return 0; } +static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, + int position, + s32 stack_base, + u32 callback_subprogno, + u32 *cnt) +{ + s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; + s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; + s32 r8_offset = stack_base + 2 * BPF_REG_SIZE; + int reg_loop_max = BPF_REG_6; + int reg_loop_cnt = BPF_REG_7; + int reg_loop_ctx = BPF_REG_8; + + struct bpf_prog *new_prog; + u32 callback_start; + u32 call_insn_offset; + s32 callback_offset; + + /* This represents an inlined version of bpf_iter.c:bpf_loop, + * be careful to modify this code in sync. + */ + struct bpf_insn insn_buf[] = { + /* Return error and jump to the end of the patch if + * expected number of iterations is too big. + */ + BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2), + BPF_MOV32_IMM(BPF_REG_0, -E2BIG), + BPF_JMP_IMM(BPF_JA, 0, 0, 16), + /* spill R6, R7, R8 to use these as loop vars */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset), + /* initialize loop vars */ + BPF_MOV64_REG(reg_loop_max, BPF_REG_1), + BPF_MOV32_IMM(reg_loop_cnt, 0), + BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3), + /* loop header, + * if reg_loop_cnt >= reg_loop_max skip the loop body + */ + BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5), + /* callback call, + * correct callback offset would be set after patching + */ + BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt), + BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx), + BPF_CALL_REL(0), + /* increment loop counter */ + BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1), + /* jump to loop header if callback returned 0 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6), + /* return value of bpf_loop, + * set R0 to the number of iterations + */ + BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt), + /* restore original values of R6, R7, R8 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset), + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset), + }; + + *cnt = ARRAY_SIZE(insn_buf); + new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt); + if (!new_prog) + return new_prog; + + /* callback start is known only after patching */ + callback_start = env->subprog_info[callback_subprogno].start; + /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */ + call_insn_offset = position + 12; + callback_offset = callback_start - call_insn_offset - 1; + env->prog->insnsi[call_insn_offset].imm = callback_offset; + + return new_prog; +} + +static bool is_bpf_loop_call(struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == 0 && + insn->imm == BPF_FUNC_loop; +} + +/* For all sub-programs in the program (including main) check + * insn_aux_data to see if there are bpf_loop calls that require + * inlining. If such calls are found the calls are replaced with a + * sequence of instructions produced by `inline_bpf_loop` function and + * subprog stack_depth is increased by the size of 3 registers. + * This stack space is used to spill values of the R6, R7, R8. These + * registers are used to store the loop bound, counter and context + * variables. + */ +static int optimize_bpf_loop(struct bpf_verifier_env *env) +{ + struct bpf_subprog_info *subprogs = env->subprog_info; + int i, cur_subprog = 0, cnt, delta = 0; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + u16 stack_depth = subprogs[cur_subprog].stack_depth; + u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; + u16 stack_depth_extra = 0; + + for (i = 0; i < insn_cnt; i++, insn++) { + struct bpf_loop_inline_state *inline_state = + &env->insn_aux_data[i + delta].loop_inline_state; + + if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) { + struct bpf_prog *new_prog; + + stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup; + new_prog = inline_bpf_loop(env, + i + delta, + -(stack_depth + stack_depth_extra), + inline_state->callback_subprogno, + &cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = new_prog; + insn = new_prog->insnsi + i + delta; + } + + if (subprogs[cur_subprog + 1].start == i + delta + 1) { + subprogs[cur_subprog].stack_depth += stack_depth_extra; + cur_subprog++; + stack_depth = subprogs[cur_subprog].stack_depth; + stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; + stack_depth_extra = 0; + } + } + + env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; + + return 0; +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -15052,6 +15219,9 @@ skip_full_check: ret = check_max_stack_depth(env); /* instruction rewrites happen after this point */ + if (ret == 0) + ret = optimize_bpf_loop(env); + if (is_priv) { if (ret == 0) opt_hard_wire_dead_code_branches(env); -- cgit v1.2.3 From f9aefd6b2aa38b9787d2705f0f1161dfd23cdb8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Jun 2022 02:30:17 -0700 Subject: net: warn if mac header was not set Make sure skb_mac_header(), skb_mac_offset() and skb_mac_header_len() uses are not fooled if the mac header has not been set. These checks are enabled if CONFIG_DEBUG_NET=y This commit will likely expose existing bugs in linux networking stacks. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20220620093017.3366713-1-eric.dumazet@gmail.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 82edf0359ab3..cd4a8268894a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2763,8 +2763,14 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset) skb->network_header += offset; } +static inline int skb_mac_header_was_set(const struct sk_buff *skb) +{ + return skb->mac_header != (typeof(skb->mac_header))~0U; +} + static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->head + skb->mac_header; } @@ -2775,14 +2781,10 @@ static inline int skb_mac_offset(const struct sk_buff *skb) static inline u32 skb_mac_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->network_header - skb->mac_header; } -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != (typeof(skb->mac_header))~0U; -} - static inline void skb_unset_mac_header(struct sk_buff *skb) { skb->mac_header = (typeof(skb->mac_header))~0U; -- cgit v1.2.3 From 95acd8817e66d031d2e6ee7def3f1e1874819317 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Fri, 17 Jun 2022 12:57:34 +0200 Subject: bpf, x64: Add predicate for bpf2bpf with tailcalls support in JIT The BPF core/verifier is hard-coded to permit mixing bpf2bpf and tail calls for only x86-64. Change the logic to instead rely on a new weak function 'bool bpf_jit_supports_subprog_tailcalls(void)', which a capable JIT backend can override. Update the x86-64 eBPF JIT to reflect this. Signed-off-by: Tony Ambardar [jakub: drop MIPS bits and tweak patch subject] Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220617105735.733938-2-jakub@cloudflare.com --- arch/x86/net/bpf_jit_comp.c | 6 ++++++ include/linux/filter.h | 1 + kernel/bpf/core.c | 6 ++++++ kernel/bpf/verifier.c | 3 ++- 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index f298b18a9a3d..2c51ca9f7cec 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2491,3 +2491,9 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len) return ERR_PTR(-EINVAL); return dst; } + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} diff --git a/include/linux/filter.h b/include/linux/filter.h index d0cbb31b1b4d..4c1a8b247545 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -914,6 +914,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_helper_changes_pkt_data(void *func); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b5ffebcce6cc..f023cb399e3f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2729,6 +2729,12 @@ bool __weak bpf_jit_needs_zext(void) return false; } +/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool __weak bpf_jit_supports_subprog_tailcalls(void) +{ + return false; +} + bool __weak bpf_jit_supports_kfunc_call(void) { return false; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf72dc511df6..a20d7736a5b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6154,7 +6154,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env) { - return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64); + return env->prog->jit_requested && + bpf_jit_supports_subprog_tailcalls(); } static int check_map_func_compatibility(struct bpf_verifier_env *env, -- cgit v1.2.3 From ebc3197963fc42841b183d0280bd3f9f85f13c30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:32 +0000 Subject: ipmr: add rcu protection over (struct vif_device)->dev We will soon use RCU instead of rwlock in ipmr & ip6mr This preliminary patch adds proper rcu verbs to read/write (struct vif_device)->dev Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 11 +++++--- net/ipv4/ipmr.c | 65 ++++++++++++++++++++++++++------------------- net/ipv4/ipmr_base.c | 49 +++++++++++++++++++++------------- net/ipv6/ip6mr.c | 63 ++++++++++++++++++++++++------------------- 4 files changed, 111 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index e05ee9f001ff..10d1e4fb4e9f 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -26,7 +26,7 @@ * @remote: Remote address for tunnels */ struct vif_device { - struct net_device *dev; + struct net_device __rcu *dev; netdevice_tracker dev_tracker; unsigned long bytes_in, bytes_out; unsigned long pkt_in, pkt_out; @@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, struct netlink_ext_ack *extack) { @@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, .family = family, .extack = extack, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, unsigned int *ipmr_seq) { @@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net, .info = { .family = family, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net, #define MAXVIFS 32 #endif -#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) +/* Note: This helper is deprecated. */ +#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev)) /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8324e541d193..10371a9e78fc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -79,6 +79,12 @@ struct ipmr_result { static DEFINE_RWLOCK(mrt_lock); +static struct net_device *vif_dev_read(const struct vif_device *vif) +{ + return rcu_dereference_check(vif->dev, + lockdep_is_held(&mrt_lock)); +} + /* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ @@ -586,7 +592,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, read_lock(&mrt_lock); if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; + reg_dev = vif_dev_read(&mrt->vif_table[mrt->mroute_reg_vif_num]); read_unlock(&mrt_lock); if (!reg_dev) @@ -614,10 +620,11 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) static int call_ipmr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, vifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, - vif, vif_index, tb_id, + vif, vif_dev, vif_index, tb_id, &net->ipv4.ipmr_seq); } @@ -649,18 +656,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; - if (VIF_EXISTS(mrt, vifi)) - call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, - mrt->id); + dev = rtnl_dereference(v->dev); + if (!dev) + return -EADDRNOTAVAIL; write_lock_bh(&mrt_lock); - dev = v->dev; - v->dev = NULL; - - if (!dev) { - write_unlock_bh(&mrt_lock); - return -EADDRNOTAVAIL; - } + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, + vifi, mrt->id); + RCU_INIT_POINTER(v->dev, NULL); if (vifi == mrt->mroute_reg_vif_num) mrt->mroute_reg_vif_num = -1; @@ -890,14 +893,15 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - v->dev = dev; + rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); - call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, + vifi, mrt->id); return 0; } @@ -1726,7 +1730,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v ipmr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { - if (v->dev == dev) + if (rcu_access_pointer(v->dev) == dev) vif_delete(mrt, ct, 1, NULL); } } @@ -1811,19 +1815,21 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; + struct net_device *vif_dev; struct net_device *dev; struct rtable *rt; struct flowi4 fl4; int encap = 0; - if (!vif->dev) + vif_dev = vif_dev_read(vif); + if (!vif_dev) goto out_free; if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; - vif->dev->stats.tx_bytes += skb->len; - vif->dev->stats.tx_packets++; + vif_dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } @@ -1881,8 +1887,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (vif->flags & VIFF_TUNNEL) { ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - vif->dev->stats.tx_packets++; - vif->dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; + vif_dev->stats.tx_bytes += skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1911,7 +1917,7 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) int ct; for (ct = mrt->maxvif-1; ct >= 0; ct--) { - if (mrt->vif_table[ct].dev == dev) + if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; @@ -1944,7 +1950,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } /* Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != dev) { + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -2744,18 +2750,21 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) { + struct net_device *vif_dev; struct nlattr *vif_nest; struct vif_device *vif; + vif = &mrt->vif_table[vifid]; + vif_dev = vif_dev_read(vif); /* if the VIF doesn't exist just continue */ - if (!VIF_EXISTS(mrt, vifid)) + if (!vif_dev) return true; - vif = &mrt->vif_table[vifid]; vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); if (!vif_nest) return false; - if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || + + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) || nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, @@ -2919,9 +2928,11 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? - vif->dev->name : "none"; + const struct net_device *vif_dev; + const char *name; + vif_dev = vif_dev_read(vif); + name = vif_dev ? vif_dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", vif - mrt->vif_table, diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index aa8738a91210..59f62b938472 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v, unsigned short flags, unsigned short get_iflink_mask) { - v->dev = NULL; + RCU_INIT_POINTER(v->dev, NULL); v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; @@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next); int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mr_mfc *c, struct rtmsg *rtm) { + struct net_device *vif_dev; struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; @@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, return -ENOENT; } - if (VIF_EXISTS(mrt, c->mfc_parent) && - nla_put_u32(skb, RTA_IIF, - mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) + rcu_read_lock(); + vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev); + if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) { + rcu_read_unlock(); return -EMSGSIZE; + } + rcu_read_unlock(); if (c->mfc_flags & MFC_OFFLOAD) rtm->rtm_flags |= RTNH_F_OFFLOAD; @@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (!mp_attr) return -EMSGSIZE; + rcu_read_lock(); for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - struct vif_device *vif; + struct vif_device *vif = &mrt->vif_table[ct]; + + vif_dev = rcu_dereference(vif->dev); + if (vif_dev && c->mfc_un.res.ttls[ct] < 255) { nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); if (!nhp) { + rcu_read_unlock(); nla_nest_cancel(skb, mp_attr); return -EMSGSIZE; } nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - vif = &mrt->vif_table[ct]; - nhp->rtnh_ifindex = vif->dev->ifindex; + nhp->rtnh_ifindex = vif_dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } + rcu_read_unlock(); nla_nest_end(skb, mp_attr); @@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt, int ct; for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - const struct vif_device *vif; - - vif = &mrt->vif_table[ct]; - if (vif->dev == dev) - return true; - } + const struct net_device *vif_dev; + const struct vif_device *vif; + + vif = &mrt->vif_table[ct]; + vif_dev = rcu_access_pointer(vif->dev); + if (vif_dev && c->mfc_un.res.ttls[ct] < 255 && + vif_dev == dev) + return true; } return false; } @@ -402,18 +411,22 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) { struct vif_device *v = &mrt->vif_table[0]; + struct net_device *vif_dev; struct mr_mfc *mfc; int vifi; /* Notifiy on table VIF entries */ read_lock(mrt_lock); for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { - if (!v->dev) + vif_dev = rcu_dereference_check(v->dev, + lockdep_is_held(mrt_lock)); + if (!vif_dev) continue; err = mr_call_vif_notifier(nb, family, - FIB_EVENT_VIF_ADD, - v, vifi, mrt->id, extack); + FIB_EVENT_VIF_ADD, v, + vif_dev, vifi, + mrt->id, extack); if (err) break; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index aa66c032ba97..44cb3d88bbd6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -64,6 +64,12 @@ struct ip6mr_result { static DEFINE_RWLOCK(mrt_lock); +static struct net_device *vif_dev_read(const struct vif_device *vif) +{ + return rcu_dereference_check(vif->dev, + lockdep_is_held(&mrt_lock)); +} + /* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ @@ -430,7 +436,11 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? vif->dev->name : "none"; + const struct net_device *vif_dev; + const char *name; + + vif_dev = vif_dev_read(vif); + name = vif_dev ? vif_dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", @@ -553,7 +563,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = mrt->vif_table[reg_vif_num].dev; + reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); read_unlock(&mrt_lock); if (!reg_dev) @@ -668,10 +678,11 @@ failure: static int call_ip6mr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, mifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, - vif, vif_index, tb_id, + vif, vif_dev, vif_index, tb_id, &net->ipv6.ipmr_seq); } @@ -696,19 +707,15 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; - if (VIF_EXISTS(mrt, vifi)) - call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_VIF_DEL, v, vifi, - mrt->id); + dev = rtnl_dereference(v->dev); + if (!dev) + return -EADDRNOTAVAIL; + call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_VIF_DEL, v, dev, + vifi, mrt->id); write_lock_bh(&mrt_lock); - dev = v->dev; - v->dev = NULL; - - if (!dev) { - write_unlock_bh(&mrt_lock); - return -EADDRNOTAVAIL; - } + RCU_INIT_POINTER(v->dev, NULL); #ifdef CONFIG_IPV6_PIMSM_V2 if (vifi == mrt->mroute_reg_vif_num) @@ -911,7 +918,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - v->dev = dev; + rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) @@ -921,7 +928,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, mrt->maxvif = vifi + 1; write_unlock_bh(&mrt_lock); call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, - v, vifi, mrt->id); + v, dev, vifi, mrt->id); return 0; } @@ -1241,7 +1248,7 @@ static int ip6mr_device_event(struct notifier_block *this, ip6mr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { - if (v->dev == dev) + if (rcu_access_pointer(v->dev) == dev) mif6_delete(mrt, ct, 1, NULL); } } @@ -2019,21 +2026,22 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct static int ip6mr_forward2(struct net *net, struct mr_table *mrt, struct sk_buff *skb, int vifi) { - struct ipv6hdr *ipv6h; struct vif_device *vif = &mrt->vif_table[vifi]; - struct net_device *dev; + struct net_device *vif_dev; + struct ipv6hdr *ipv6h; struct dst_entry *dst; struct flowi6 fl6; - if (!vif->dev) + vif_dev = vif_dev_read(vif); + if (!vif_dev) goto out_free; #ifdef CONFIG_IPV6_PIMSM_V2 if (vif->flags & MIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; - vif->dev->stats.tx_bytes += skb->len; - vif->dev->stats.tx_packets++; + vif_dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); goto out_free; } @@ -2066,14 +2074,13 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - dev = vif->dev; - skb->dev = dev; + skb->dev = vif_dev; vif->pkt_out++; vif->bytes_out += skb->len; /* We are about to write */ /* XXX: extension headers? */ - if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) + if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) goto out_free; ipv6h = ipv6_hdr(skb); @@ -2082,7 +2089,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, dev, + net, NULL, skb, skb->dev, vif_dev, ip6mr_forward2_finish); out_free: @@ -2095,7 +2102,7 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) int ct; for (ct = mrt->maxvif - 1; ct >= 0; ct--) { - if (mrt->vif_table[ct].dev == dev) + if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; @@ -2133,7 +2140,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != dev) { + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && -- cgit v1.2.3 From 194366b28b8306b7a24596c57c09635ab2891252 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:46 +0000 Subject: ipmr: adopt rcu_read_lock() in mr_dump() We no longer need to acquire mrt_lock() in mr_dump, using rcu_read_lock() is enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 4 ++-- net/ipv4/ipmr.c | 2 +- net/ipv4/ipmr_base.c | 8 +++----- net/ipv6/ip6mr.c | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 10d1e4fb4e9f..9dd4bf157255 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -308,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -363,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { return -EINVAL; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 69ccd3d7c655..38963b8de7af 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3027,7 +3027,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, - ipmr_mr_table_iter, &mrt_lock, extack); + ipmr_mr_table_iter, extack); } static const struct fib_notifier_ops ipmr_notifier_ops_template = { diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 59f62b938472..271dc03fc6db 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -399,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack) { struct mr_table *mrt; @@ -416,10 +415,9 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int vifi; /* Notifiy on table VIF entries */ - read_lock(mrt_lock); + rcu_read_lock(); for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { - vif_dev = rcu_dereference_check(v->dev, - lockdep_is_held(mrt_lock)); + vif_dev = rcu_dereference(v->dev); if (!vif_dev) continue; @@ -430,7 +428,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, if (err) break; } - read_unlock(mrt_lock); + rcu_read_unlock(); if (err) return err; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 08ac177fe30c..f0a9bceb8e3c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1267,7 +1267,7 @@ static int ip6mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, - ip6mr_mr_table_iter, &mrt_lock, extack); + ip6mr_mr_table_iter, extack); } static struct notifier_block ip6_mr_notifier = { -- cgit v1.2.3 From ede57d58e6f38d5bc66137368e4a1e68a157af6e Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Wed, 22 Jun 2022 18:09:03 +0200 Subject: net: helper function skb_len_add Move the len fields manipulation in the skbs to a helper function. There is a comment specifically requesting this and there are several other areas in the code displaying the same pattern which can be refactored. This improves code readability. Signed-off-by: Richard Gobert Link: https://lore.kernel.org/r/20220622160853.GA6478@debian Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++++ include/net/sock.h | 4 +--- net/core/skbuff.c | 13 +++---------- net/ipv4/esp4.c | 4 +--- net/ipv4/ip_output.c | 8 ++------ 5 files changed, 19 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cd4a8268894a..f6a27ab19202 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2351,6 +2351,18 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +/** + * skb_len_add - adds a number to len fields of skb + * @skb: buffer to add len to + * @delta: number of bytes to add + */ +static inline void skb_len_add(struct sk_buff *skb, int delta) +{ + skb->len += delta; + skb->data_len += delta; + skb->truesize += delta; +} + /** * __skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised diff --git a/include/net/sock.h b/include/net/sock.h index 5bed1ea7a722..40bbd0e8925b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2219,9 +2219,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro if (err) return err; - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 00bf35ee8205..c62e42d0c531 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3195,9 +3195,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) } } - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; + skb_len_add(to, len + plen); if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { skb_tx_error(from); @@ -3634,13 +3632,8 @@ onlymerged: tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; - /* Yak, is it really working this way? Some helper please? */ - skb->len -= shiftlen; - skb->data_len -= shiftlen; - skb->truesize -= shiftlen; - tgt->len += shiftlen; - tgt->data_len += shiftlen; - tgt->truesize += shiftlen; + skb_len_add(skb, -shiftlen); + skb_len_add(tgt, shiftlen); return shiftlen; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b21238df3301..7eae8d686e20 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,9 +502,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * nfrags++; - skb->len += tailen; - skb->data_len += tailen; - skb->truesize += tailen; + skb_len_add(skb, tailen); if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 00b4bf26fd93..5e32a2f86fbd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1214,9 +1214,7 @@ alloc_new_skb: pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - skb->len += copy; - skb->data_len += copy; - skb->truesize += copy; + skb_len_add(skb, copy); wmem_alloc_delta += copy; } else { err = skb_zerocopy_iter_dgram(skb, from, copy); @@ -1443,9 +1441,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb->csum = csum_block_add(skb->csum, csum, skb->len); } - skb->len += len; - skb->data_len += len; - skb->truesize += len; + skb_len_add(skb, len); refcount_add(len, &sk->sk_wmem_alloc); offset += len; size -= len; -- cgit v1.2.3 From 7dc54d3b8d9100c65b0860f76342fc9f3b4694d9 Mon Sep 17 00:00:00 2001 From: Clément Léger Date: Fri, 24 Jun 2022 16:39:50 +0200 Subject: net: pcs: add Renesas MII converter driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a PCS driver for the MII converter that is present on the Renesas RZ/N1 SoC. This MII converter is reponsible for converting MII to RMII/RGMII or act as a MII pass-trough. Exposing it as a PCS allows to reuse it in both the switch driver and the stmmac driver. Currently, this driver only allows the PCS to be used by the dual Cortex-A7 subsystem since the register locking system is not used. Signed-off-by: Clément Léger Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/pcs/Kconfig | 8 + drivers/net/pcs/Makefile | 1 + drivers/net/pcs/pcs-rzn1-miic.c | 520 ++++++++++++++++++++++++++++++++++++++++ include/linux/pcs-rzn1-miic.h | 18 ++ 4 files changed, 547 insertions(+) create mode 100644 drivers/net/pcs/pcs-rzn1-miic.c create mode 100644 include/linux/pcs-rzn1-miic.h (limited to 'include/linux') diff --git a/drivers/net/pcs/Kconfig b/drivers/net/pcs/Kconfig index f778e5155fae..6289b7c765f1 100644 --- a/drivers/net/pcs/Kconfig +++ b/drivers/net/pcs/Kconfig @@ -18,4 +18,12 @@ config PCS_LYNX This module provides helpers to phylink for managing the Lynx PCS which is part of the Layerscape and QorIQ Ethernet SERDES. +config PCS_RZN1_MIIC + tristate "Renesas RZ/N1 MII converter" + depends on OF && (ARCH_RZN1 || COMPILE_TEST) + help + This module provides a driver for the MII converter that is available + on RZ/N1 SoCs. This PCS converts MII to RMII/RGMII or can be set in + pass-through mode for MII. + endmenu diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile index 0603d469bd57..0ff5388fcdea 100644 --- a/drivers/net/pcs/Makefile +++ b/drivers/net/pcs/Makefile @@ -5,3 +5,4 @@ pcs_xpcs-$(CONFIG_PCS_XPCS) := pcs-xpcs.o pcs-xpcs-nxp.o obj-$(CONFIG_PCS_XPCS) += pcs_xpcs.o obj-$(CONFIG_PCS_LYNX) += pcs-lynx.o +obj-$(CONFIG_PCS_RZN1_MIIC) += pcs-rzn1-miic.o diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c new file mode 100644 index 000000000000..8f5e910f443d --- /dev/null +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MIIC_PRCMD 0x0 +#define MIIC_ESID_CODE 0x4 + +#define MIIC_MODCTRL 0x20 +#define MIIC_MODCTRL_SW_MODE GENMASK(4, 0) + +#define MIIC_CONVCTRL(port) (0x100 + (port) * 4) + +#define MIIC_CONVCTRL_CONV_SPEED GENMASK(1, 0) +#define CONV_MODE_10MBPS 0 +#define CONV_MODE_100MBPS 1 +#define CONV_MODE_1000MBPS 2 + +#define MIIC_CONVCTRL_CONV_MODE GENMASK(3, 2) +#define CONV_MODE_MII 0 +#define CONV_MODE_RMII 1 +#define CONV_MODE_RGMII 2 + +#define MIIC_CONVCTRL_FULLD BIT(8) +#define MIIC_CONVCTRL_RGMII_LINK BIT(12) +#define MIIC_CONVCTRL_RGMII_DUPLEX BIT(13) +#define MIIC_CONVCTRL_RGMII_SPEED GENMASK(15, 14) + +#define MIIC_CONVRST 0x114 +#define MIIC_CONVRST_PHYIF_RST(port) BIT(port) +#define MIIC_CONVRST_PHYIF_RST_MASK GENMASK(4, 0) + +#define MIIC_SWCTRL 0x304 +#define MIIC_SWDUPC 0x308 + +#define MIIC_MAX_NR_PORTS 5 + +#define MIIC_MODCTRL_CONF_CONV_NUM 6 +#define MIIC_MODCTRL_CONF_NONE -1 + +/** + * struct modctrl_match - Matching table entry for convctrl configuration + * See section 8.2.1 of manual. + * @mode_cfg: Configuration value for convctrl + * @conv: Configuration of ethernet port muxes. First index is SWITCH_PORTIN, + * then index 1 - 5 are CONV1 - CONV5. + */ +struct modctrl_match { + u32 mode_cfg; + u8 conv[MIIC_MODCTRL_CONF_CONV_NUM]; +}; + +static struct modctrl_match modctrl_match_table[] = { + {0x0, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}}, + {0x1, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x2, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x3, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}}, + + {0x8, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}}, + {0x9, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0xA, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0xB, {MIIC_RTOS_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}}, + + {0x10, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SERCOS_PORTB, MIIC_SERCOS_PORTA}}, + {0x11, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x12, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_ETHERCAT_PORTC, MIIC_ETHERCAT_PORTB, MIIC_ETHERCAT_PORTA}}, + {0x13, {MIIC_GMAC2_PORT, MIIC_GMAC1_PORT, MIIC_SWITCH_PORTD, + MIIC_SWITCH_PORTC, MIIC_SWITCH_PORTB, MIIC_SWITCH_PORTA}} +}; + +static const char * const conf_to_string[] = { + [MIIC_GMAC1_PORT] = "GMAC1_PORT", + [MIIC_GMAC2_PORT] = "GMAC2_PORT", + [MIIC_RTOS_PORT] = "RTOS_PORT", + [MIIC_SERCOS_PORTA] = "SERCOS_PORTA", + [MIIC_SERCOS_PORTB] = "SERCOS_PORTB", + [MIIC_ETHERCAT_PORTA] = "ETHERCAT_PORTA", + [MIIC_ETHERCAT_PORTB] = "ETHERCAT_PORTB", + [MIIC_ETHERCAT_PORTC] = "ETHERCAT_PORTC", + [MIIC_SWITCH_PORTA] = "SWITCH_PORTA", + [MIIC_SWITCH_PORTB] = "SWITCH_PORTB", + [MIIC_SWITCH_PORTC] = "SWITCH_PORTC", + [MIIC_SWITCH_PORTD] = "SWITCH_PORTD", + [MIIC_HSR_PORTA] = "HSR_PORTA", + [MIIC_HSR_PORTB] = "HSR_PORTB", +}; + +static const char *index_to_string[MIIC_MODCTRL_CONF_CONV_NUM] = { + "SWITCH_PORTIN", + "CONV1", + "CONV2", + "CONV3", + "CONV4", + "CONV5", +}; + +/** + * struct miic - MII converter structure + * @base: base address of the MII converter + * @dev: Device associated to the MII converter + * @clks: Clocks used for this device + * @nclk: Number of clocks + * @lock: Lock used for read-modify-write access + */ +struct miic { + void __iomem *base; + struct device *dev; + struct clk_bulk_data *clks; + int nclk; + spinlock_t lock; +}; + +/** + * struct miic_port - Per port MII converter struct + * @miic: backiling to MII converter structure + * @pcs: PCS structure associated to the port + * @port: port number + */ +struct miic_port { + struct miic *miic; + struct phylink_pcs pcs; + int port; +}; + +static struct miic_port *phylink_pcs_to_miic_port(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct miic_port, pcs); +} + +static void miic_reg_writel(struct miic *miic, int offset, u32 value) +{ + writel(value, miic->base + offset); +} + +static u32 miic_reg_readl(struct miic *miic, int offset) +{ + return readl(miic->base + offset); +} + +static void miic_reg_rmw(struct miic *miic, int offset, u32 mask, u32 val) +{ + u32 reg; + + spin_lock(&miic->lock); + + reg = miic_reg_readl(miic, offset); + reg &= ~mask; + reg |= val; + miic_reg_writel(miic, offset, reg); + + spin_unlock(&miic->lock); +} + +static void miic_converter_enable(struct miic *miic, int port, int enable) +{ + u32 val = 0; + + if (enable) + val = MIIC_CONVRST_PHYIF_RST(port); + + miic_reg_rmw(miic, MIIC_CONVRST, MIIC_CONVRST_PHYIF_RST(port), val); +} + +static int miic_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, bool permit) +{ + struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); + struct miic *miic = miic_port->miic; + int port = miic_port->port; + u32 speed, conv_mode, val; + + switch (interface) { + case PHY_INTERFACE_MODE_RMII: + conv_mode = CONV_MODE_RMII; + speed = CONV_MODE_100MBPS; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + conv_mode = CONV_MODE_RGMII; + speed = CONV_MODE_1000MBPS; + break; + case PHY_INTERFACE_MODE_MII: + conv_mode = CONV_MODE_MII; + /* When in MII mode, speed should be set to 0 (which is actually + * CONV_MODE_10MBPS) + */ + speed = CONV_MODE_10MBPS; + break; + default: + return -EOPNOTSUPP; + } + + val = FIELD_PREP(MIIC_CONVCTRL_CONV_MODE, conv_mode) | + FIELD_PREP(MIIC_CONVCTRL_CONV_SPEED, speed); + + miic_reg_rmw(miic, MIIC_CONVCTRL(port), + MIIC_CONVCTRL_CONV_MODE | MIIC_CONVCTRL_CONV_SPEED, val); + miic_converter_enable(miic_port->miic, miic_port->port, 1); + + return 0; +} + +static void miic_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex) +{ + struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); + struct miic *miic = miic_port->miic; + u32 conv_speed = 0, val = 0; + int port = miic_port->port; + + if (duplex == DUPLEX_FULL) + val |= MIIC_CONVCTRL_FULLD; + + /* No speed in MII through-mode */ + if (interface != PHY_INTERFACE_MODE_MII) { + switch (speed) { + case SPEED_1000: + conv_speed = CONV_MODE_1000MBPS; + break; + case SPEED_100: + conv_speed = CONV_MODE_100MBPS; + break; + case SPEED_10: + conv_speed = CONV_MODE_10MBPS; + break; + default: + return; + } + } + + val |= FIELD_PREP(MIIC_CONVCTRL_CONV_SPEED, conv_speed); + + miic_reg_rmw(miic, MIIC_CONVCTRL(port), + (MIIC_CONVCTRL_CONV_SPEED | MIIC_CONVCTRL_FULLD), val); +} + +static int miic_validate(struct phylink_pcs *pcs, unsigned long *supported, + const struct phylink_link_state *state) +{ + if (phy_interface_mode_is_rgmii(state->interface) || + state->interface == PHY_INTERFACE_MODE_RMII || + state->interface == PHY_INTERFACE_MODE_MII) + return 1; + + return -EINVAL; +} + +static const struct phylink_pcs_ops miic_phylink_ops = { + .pcs_validate = miic_validate, + .pcs_config = miic_config, + .pcs_link_up = miic_link_up, +}; + +struct phylink_pcs *miic_create(struct device *dev, struct device_node *np) +{ + struct platform_device *pdev; + struct miic_port *miic_port; + struct device_node *pcs_np; + struct miic *miic; + u32 port; + + if (!of_device_is_available(np)) + return ERR_PTR(-ENODEV); + + if (of_property_read_u32(np, "reg", &port)) + return ERR_PTR(-EINVAL); + + if (port > MIIC_MAX_NR_PORTS || port < 1) + return ERR_PTR(-EINVAL); + + /* The PCS pdev is attached to the parent node */ + pcs_np = of_get_parent(np); + if (!pcs_np) + return ERR_PTR(-ENODEV); + + if (!of_device_is_available(pcs_np)) { + of_node_put(pcs_np); + return ERR_PTR(-ENODEV); + } + + pdev = of_find_device_by_node(pcs_np); + of_node_put(pcs_np); + if (!pdev || !platform_get_drvdata(pdev)) + return ERR_PTR(-EPROBE_DEFER); + + miic_port = kzalloc(sizeof(*miic_port), GFP_KERNEL); + if (!miic_port) + return ERR_PTR(-ENOMEM); + + miic = platform_get_drvdata(pdev); + device_link_add(dev, miic->dev, DL_FLAG_AUTOREMOVE_CONSUMER); + + miic_port->miic = miic; + miic_port->port = port - 1; + miic_port->pcs.ops = &miic_phylink_ops; + + return &miic_port->pcs; +} +EXPORT_SYMBOL(miic_create); + +void miic_destroy(struct phylink_pcs *pcs) +{ + struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); + + miic_converter_enable(miic_port->miic, miic_port->port, 0); + kfree(miic_port); +} +EXPORT_SYMBOL(miic_destroy); + +static int miic_init_hw(struct miic *miic, u32 cfg_mode) +{ + int port; + + /* Unlock write access to accessory registers (cf datasheet). If this + * is going to be used in conjunction with the Cortex-M3, this sequence + * will have to be moved in register write + */ + miic_reg_writel(miic, MIIC_PRCMD, 0x00A5); + miic_reg_writel(miic, MIIC_PRCMD, 0x0001); + miic_reg_writel(miic, MIIC_PRCMD, 0xFFFE); + miic_reg_writel(miic, MIIC_PRCMD, 0x0001); + + miic_reg_writel(miic, MIIC_MODCTRL, + FIELD_PREP(MIIC_MODCTRL_SW_MODE, cfg_mode)); + + for (port = 0; port < MIIC_MAX_NR_PORTS; port++) { + miic_converter_enable(miic, port, 0); + /* Disable speed/duplex control from these registers, datasheet + * says switch registers should be used to setup switch port + * speed and duplex. + */ + miic_reg_writel(miic, MIIC_SWCTRL, 0x0); + miic_reg_writel(miic, MIIC_SWDUPC, 0x0); + } + + return 0; +} + +static bool miic_modctrl_match(s8 table_val[MIIC_MODCTRL_CONF_CONV_NUM], + s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM]) +{ + int i; + + for (i = 0; i < MIIC_MODCTRL_CONF_CONV_NUM; i++) { + if (dt_val[i] == MIIC_MODCTRL_CONF_NONE) + continue; + + if (dt_val[i] != table_val[i]) + return false; + } + + return true; +} + +static void miic_dump_conf(struct device *dev, + s8 conf[MIIC_MODCTRL_CONF_CONV_NUM]) +{ + const char *conf_name; + int i; + + for (i = 0; i < MIIC_MODCTRL_CONF_CONV_NUM; i++) { + if (conf[i] != MIIC_MODCTRL_CONF_NONE) + conf_name = conf_to_string[conf[i]]; + else + conf_name = "NONE"; + + dev_err(dev, "%s: %s\n", index_to_string[i], conf_name); + } +} + +static int miic_match_dt_conf(struct device *dev, + s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM], + u32 *mode_cfg) +{ + struct modctrl_match *table_entry; + int i; + + for (i = 0; i < ARRAY_SIZE(modctrl_match_table); i++) { + table_entry = &modctrl_match_table[i]; + + if (miic_modctrl_match(table_entry->conv, dt_val)) { + *mode_cfg = table_entry->mode_cfg; + return 0; + } + } + + dev_err(dev, "Failed to apply requested configuration\n"); + miic_dump_conf(dev, dt_val); + + return -EINVAL; +} + +static int miic_parse_dt(struct device *dev, u32 *mode_cfg) +{ + s8 dt_val[MIIC_MODCTRL_CONF_CONV_NUM]; + struct device_node *np = dev->of_node; + struct device_node *conv; + u32 conf; + int port; + + memset(dt_val, MIIC_MODCTRL_CONF_NONE, sizeof(dt_val)); + + if (of_property_read_u32(np, "renesas,miic-switch-portin", &conf) == 0) + dt_val[0] = conf; + + for_each_child_of_node(np, conv) { + if (of_property_read_u32(conv, "reg", &port)) + continue; + + if (!of_device_is_available(conv)) + continue; + + if (of_property_read_u32(conv, "renesas,miic-input", &conf) == 0) + dt_val[port] = conf; + } + + return miic_match_dt_conf(dev, dt_val, mode_cfg); +} + +static int miic_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct miic *miic; + u32 mode_cfg; + int ret; + + ret = miic_parse_dt(dev, &mode_cfg); + if (ret < 0) + return ret; + + miic = devm_kzalloc(dev, sizeof(*miic), GFP_KERNEL); + if (!miic) + return -ENOMEM; + + spin_lock_init(&miic->lock); + miic->dev = dev; + miic->base = devm_platform_ioremap_resource(pdev, 0); + if (!miic->base) + return -EINVAL; + + ret = devm_pm_runtime_enable(dev); + if (ret < 0) + return ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + ret = miic_init_hw(miic, mode_cfg); + if (ret) + goto disable_runtime_pm; + + /* miic_create() relies on that fact that data are attached to the + * platform device to determine if the driver is ready so this needs to + * be the last thing to be done after everything is initialized + * properly. + */ + platform_set_drvdata(pdev, miic); + + return 0; + +disable_runtime_pm: + pm_runtime_put(dev); + + return ret; +} + +static int miic_remove(struct platform_device *pdev) +{ + pm_runtime_put(&pdev->dev); + + return 0; +} + +static const struct of_device_id miic_of_mtable[] = { + { .compatible = "renesas,rzn1-miic" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, miic_of_mtable); + +static struct platform_driver miic_driver = { + .driver = { + .name = "rzn1_miic", + .suppress_bind_attrs = true, + .of_match_table = miic_of_mtable, + }, + .probe = miic_probe, + .remove = miic_remove, +}; +module_platform_driver(miic_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Renesas MII converter PCS driver"); +MODULE_AUTHOR("Clément Léger "); diff --git a/include/linux/pcs-rzn1-miic.h b/include/linux/pcs-rzn1-miic.h new file mode 100644 index 000000000000..56d12b21365d --- /dev/null +++ b/include/linux/pcs-rzn1-miic.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Schneider Electric + * + * Clément Léger + */ + +#ifndef __LINUX_PCS_MIIC_H +#define __LINUX_PCS_MIIC_H + +struct phylink; +struct device_node; + +struct phylink_pcs *miic_create(struct device *dev, struct device_node *np); + +void miic_destroy(struct phylink_pcs *pcs); + +#endif /* __LINUX_PCS_MIIC_H */ -- cgit v1.2.3 From af3f4134006bf3bf894179c08aaf98ed5938cf4e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:04 -0700 Subject: bpf: add bpf_func_t and trampoline helpers I'll be adding lsm cgroup specific helpers that grab trampoline mutex. No functional changes. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-2-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 ++++----- kernel/bpf/trampoline.c | 63 ++++++++++++++++++++++++++----------------------- 2 files changed, 38 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d05e1495a06e..d547be9db75f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,6 +56,8 @@ typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, struct bpf_iter_aux_info *aux); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +typedef unsigned int (*bpf_func_t)(const void *, + const struct bpf_insn *); struct bpf_iter_seq_info { const struct seq_operations *seq_ops; bpf_iter_init_seq_priv_t init_seq_private; @@ -879,8 +881,7 @@ struct bpf_dispatcher { static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, - unsigned int (*bpf_func)(const void *, - const struct bpf_insn *)) + bpf_func_t bpf_func) { return bpf_func(ctx, insnsi); } @@ -909,8 +910,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ - unsigned int (*bpf_func)(const void *, \ - const struct bpf_insn *)) \ + bpf_func_t bpf_func) \ { \ return bpf_func(ctx, insnsi); \ } \ @@ -921,8 +921,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ - unsigned int (*bpf_func)(const void *, \ - const struct bpf_insn *)); \ + bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 93c7675f0c9e..5466e15be61f 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -410,7 +410,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; struct bpf_tramp_link *link_exiting; @@ -418,44 +418,33 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline int cnt = 0, i; kind = bpf_attach_type_to_tramp(link->link.prog); - mutex_lock(&tr->mutex); - if (tr->extension_prog) { + if (tr->extension_prog) /* cannot attach fentry/fexit if extension prog is attached. * cannot overwrite extension prog either. */ - err = -EBUSY; - goto out; - } + return -EBUSY; for (i = 0; i < BPF_TRAMP_MAX; i++) cnt += tr->progs_cnt[i]; if (kind == BPF_TRAMP_REPLACE) { /* Cannot attach extension if fentry/fexit are in use. */ - if (cnt) { - err = -EBUSY; - goto out; - } + if (cnt) + return -EBUSY; tr->extension_prog = link->link.prog; - err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, - link->link.prog->bpf_func); - goto out; - } - if (cnt >= BPF_MAX_TRAMP_LINKS) { - err = -E2BIG; - goto out; + return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, + link->link.prog->bpf_func); } - if (!hlist_unhashed(&link->tramp_hlist)) { + if (cnt >= BPF_MAX_TRAMP_LINKS) + return -E2BIG; + if (!hlist_unhashed(&link->tramp_hlist)) /* prog already linked */ - err = -EBUSY; - goto out; - } + return -EBUSY; hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) { if (link_exiting->link.prog != link->link.prog) continue; /* prog already linked */ - err = -EBUSY; - goto out; + return -EBUSY; } hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); @@ -465,30 +454,44 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; } -out: + return err; +} + +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +{ + int err; + + mutex_lock(&tr->mutex); + err = __bpf_trampoline_link_prog(link, tr); mutex_unlock(&tr->mutex); return err; } -/* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; int err; kind = bpf_attach_type_to_tramp(link->link.prog); - mutex_lock(&tr->mutex); if (kind == BPF_TRAMP_REPLACE) { WARN_ON_ONCE(!tr->extension_prog); err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, tr->extension_prog->bpf_func, NULL); tr->extension_prog = NULL; - goto out; + return err; } hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; - err = bpf_trampoline_update(tr); -out: + return bpf_trampoline_update(tr); +} + +/* bpf_trampoline_unlink_prog() should never fail. */ +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +{ + int err; + + mutex_lock(&tr->mutex); + err = __bpf_trampoline_unlink_prog(link, tr); mutex_unlock(&tr->mutex); return err; } -- cgit v1.2.3 From 00442143a2ab7f1da46fbf4d2a99c85df767d49a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:05 -0700 Subject: bpf: convert cgroup_bpf.progs to hlist This lets us reclaim some space to be used by new cgroup lsm slots. Before: struct cgroup_bpf { struct bpf_prog_array * effective[23]; /* 0 184 */ /* --- cacheline 2 boundary (128 bytes) was 56 bytes ago --- */ struct list_head progs[23]; /* 184 368 */ /* --- cacheline 8 boundary (512 bytes) was 40 bytes ago --- */ u32 flags[23]; /* 552 92 */ /* XXX 4 bytes hole, try to pack */ /* --- cacheline 10 boundary (640 bytes) was 8 bytes ago --- */ struct list_head storages; /* 648 16 */ struct bpf_prog_array * inactive; /* 664 8 */ struct percpu_ref refcnt; /* 672 16 */ struct work_struct release_work; /* 688 32 */ /* size: 720, cachelines: 12, members: 7 */ /* sum members: 716, holes: 1, sum holes: 4 */ /* last cacheline: 16 bytes */ }; After: struct cgroup_bpf { struct bpf_prog_array * effective[23]; /* 0 184 */ /* --- cacheline 2 boundary (128 bytes) was 56 bytes ago --- */ struct hlist_head progs[23]; /* 184 184 */ /* --- cacheline 5 boundary (320 bytes) was 48 bytes ago --- */ u8 flags[23]; /* 368 23 */ /* XXX 1 byte hole, try to pack */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ struct list_head storages; /* 392 16 */ struct bpf_prog_array * inactive; /* 408 8 */ struct percpu_ref refcnt; /* 416 16 */ struct work_struct release_work; /* 432 72 */ /* size: 504, cachelines: 8, members: 7 */ /* sum members: 503, holes: 1, sum holes: 1 */ /* last cacheline: 56 bytes */ }; Suggested-by: Jakub Sitnicki Reviewed-by: Jakub Sitnicki Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-3-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup-defs.h | 4 +-- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 76 ++++++++++++++++++++++++----------------- 3 files changed, 47 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 695d1224a71b..5d268e76d8e6 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -47,8 +47,8 @@ struct cgroup_bpf { * have either zero or one element * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS */ - struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; - u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; + struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; + u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; /* list of cgroup shared storages */ struct list_head storages; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 669d96d074ad..6673acfbf2ef 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -95,7 +95,7 @@ struct bpf_cgroup_link { }; struct bpf_prog_list { - struct list_head node; + struct hlist_node node; struct bpf_prog *prog; struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 7a394f7c205c..4adb4f3ecb7f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -157,11 +157,12 @@ static void cgroup_bpf_release(struct work_struct *work) mutex_lock(&cgroup_mutex); for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) { - struct list_head *progs = &cgrp->bpf.progs[atype]; - struct bpf_prog_list *pl, *pltmp; + struct hlist_head *progs = &cgrp->bpf.progs[atype]; + struct bpf_prog_list *pl; + struct hlist_node *pltmp; - list_for_each_entry_safe(pl, pltmp, progs, node) { - list_del(&pl->node); + hlist_for_each_entry_safe(pl, pltmp, progs, node) { + hlist_del(&pl->node); if (pl->prog) bpf_prog_put(pl->prog); if (pl->link) @@ -217,12 +218,12 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl) /* count number of elements in the list. * it's slow but the list cannot be long */ -static u32 prog_list_length(struct list_head *head) +static u32 prog_list_length(struct hlist_head *head) { struct bpf_prog_list *pl; u32 cnt = 0; - list_for_each_entry(pl, head, node) { + hlist_for_each_entry(pl, head, node) { if (!prog_list_prog(pl)) continue; cnt++; @@ -291,7 +292,7 @@ static int compute_effective_progs(struct cgroup *cgrp, if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) continue; - list_for_each_entry(pl, &p->bpf.progs[atype], node) { + hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { if (!prog_list_prog(pl)) continue; @@ -342,7 +343,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) cgroup_bpf_get(p); for (i = 0; i < NR; i++) - INIT_LIST_HEAD(&cgrp->bpf.progs[i]); + INIT_HLIST_HEAD(&cgrp->bpf.progs[i]); INIT_LIST_HEAD(&cgrp->bpf.storages); @@ -418,7 +419,7 @@ cleanup: #define BPF_CGROUP_MAX_PROGS 64 -static struct bpf_prog_list *find_attach_entry(struct list_head *progs, +static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs, struct bpf_prog *prog, struct bpf_cgroup_link *link, struct bpf_prog *replace_prog, @@ -428,12 +429,12 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs, /* single-attach case */ if (!allow_multi) { - if (list_empty(progs)) + if (hlist_empty(progs)) return NULL; - return list_first_entry(progs, typeof(*pl), node); + return hlist_entry(progs->first, typeof(*pl), node); } - list_for_each_entry(pl, progs, node) { + hlist_for_each_entry(pl, progs, node) { if (prog && pl->prog == prog && prog != replace_prog) /* disallow attaching the same prog twice */ return ERR_PTR(-EINVAL); @@ -444,7 +445,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs, /* direct prog multi-attach w/ replacement case */ if (replace_prog) { - list_for_each_entry(pl, progs, node) { + hlist_for_each_entry(pl, progs, node) { if (pl->prog == replace_prog) /* a match found */ return pl; @@ -480,7 +481,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; enum cgroup_bpf_attach_type atype; struct bpf_prog_list *pl; - struct list_head *progs; + struct hlist_head *progs; int err; if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || @@ -503,7 +504,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, if (!hierarchy_allows_attach(cgrp, atype)) return -EPERM; - if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags) + if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags) /* Disallow attaching non-overridable on top * of existing overridable in this cgroup. * Disallow attaching multi-prog if overridable or none @@ -525,12 +526,22 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, if (pl) { old_prog = pl->prog; } else { + struct hlist_node *last = NULL; + pl = kmalloc(sizeof(*pl), GFP_KERNEL); if (!pl) { bpf_cgroup_storages_free(new_storage); return -ENOMEM; } - list_add_tail(&pl->node, progs); + if (hlist_empty(progs)) + hlist_add_head(&pl->node, progs); + else + hlist_for_each(last, progs) { + if (last->next) + continue; + hlist_add_behind(&pl->node, last); + break; + } } pl->prog = prog; @@ -556,7 +567,7 @@ cleanup: } bpf_cgroup_storages_free(new_storage); if (!old_prog) { - list_del(&pl->node); + hlist_del(&pl->node); kfree(pl); } return err; @@ -587,7 +598,7 @@ static void replace_effective_prog(struct cgroup *cgrp, struct cgroup_subsys_state *css; struct bpf_prog_array *progs; struct bpf_prog_list *pl; - struct list_head *head; + struct hlist_head *head; struct cgroup *cg; int pos; @@ -603,7 +614,7 @@ static void replace_effective_prog(struct cgroup *cgrp, continue; head = &cg->bpf.progs[atype]; - list_for_each_entry(pl, head, node) { + hlist_for_each_entry(pl, head, node) { if (!prog_list_prog(pl)) continue; if (pl->link == link) @@ -637,7 +648,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, enum cgroup_bpf_attach_type atype; struct bpf_prog *old_prog; struct bpf_prog_list *pl; - struct list_head *progs; + struct hlist_head *progs; bool found = false; atype = to_cgroup_bpf_attach_type(link->type); @@ -649,7 +660,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, if (link->link.prog->type != new_prog->type) return -EINVAL; - list_for_each_entry(pl, progs, node) { + hlist_for_each_entry(pl, progs, node) { if (pl->link == link) { found = true; break; @@ -688,7 +699,7 @@ out_unlock: return ret; } -static struct bpf_prog_list *find_detach_entry(struct list_head *progs, +static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs, struct bpf_prog *prog, struct bpf_cgroup_link *link, bool allow_multi) @@ -696,14 +707,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, struct bpf_prog_list *pl; if (!allow_multi) { - if (list_empty(progs)) + if (hlist_empty(progs)) /* report error when trying to detach and nothing is attached */ return ERR_PTR(-ENOENT); /* to maintain backward compatibility NONE and OVERRIDE cgroups * allow detaching with invalid FD (prog==NULL) in legacy mode */ - return list_first_entry(progs, typeof(*pl), node); + return hlist_entry(progs->first, typeof(*pl), node); } if (!prog && !link) @@ -713,7 +724,7 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, return ERR_PTR(-EINVAL); /* find the prog or link and detach it */ - list_for_each_entry(pl, progs, node) { + hlist_for_each_entry(pl, progs, node) { if (pl->prog == prog && pl->link == link) return pl; } @@ -737,7 +748,7 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, struct cgroup_subsys_state *css; struct bpf_prog_array *progs; struct bpf_prog_list *pl; - struct list_head *head; + struct hlist_head *head; struct cgroup *cg; int pos; @@ -754,7 +765,7 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, continue; head = &cg->bpf.progs[atype]; - list_for_each_entry(pl, head, node) { + hlist_for_each_entry(pl, head, node) { if (!prog_list_prog(pl)) continue; if (pl->prog == prog && pl->link == link) @@ -791,7 +802,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum cgroup_bpf_attach_type atype; struct bpf_prog *old_prog; struct bpf_prog_list *pl; - struct list_head *progs; + struct hlist_head *progs; u32 flags; atype = to_cgroup_bpf_attach_type(type); @@ -822,9 +833,10 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, } /* now can actually delete it from this cgroup list */ - list_del(&pl->node); + hlist_del(&pl->node); + kfree(pl); - if (list_empty(progs)) + if (hlist_empty(progs)) /* last program was detached, reset flags to zero */ cgrp->bpf.flags[atype] = 0; if (old_prog) @@ -852,7 +864,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, enum bpf_attach_type type = attr->query.attach_type; enum cgroup_bpf_attach_type atype; struct bpf_prog_array *effective; - struct list_head *progs; + struct hlist_head *progs; struct bpf_prog *prog; int cnt, ret = 0, i; u32 flags; @@ -891,7 +903,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, u32 id; i = 0; - list_for_each_entry(pl, progs, node) { + hlist_for_each_entry(pl, progs, node) { prog = prog_list_prog(pl); id = prog->aux->id; if (copy_to_user(prog_ids + i, &id, sizeof(id))) -- cgit v1.2.3 From 69fd337a975c7e690dfe49d9cb4fe5ba1e6db44e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:06 -0700 Subject: bpf: per-cgroup lsm flavor Allow attaching to lsm hooks in the cgroup context. Attaching to per-cgroup LSM works exactly like attaching to other per-cgroup hooks. New BPF_LSM_CGROUP is added to trigger new mode; the actual lsm hook we attach to is signaled via existing attach_btf_id. For the hooks that have 'struct socket' or 'struct sock' as its first argument, we use the cgroup associated with that socket. For the rest, we use 'current' cgroup (this is all on default hierarchy == v2 only). Note that for some hooks that work on 'struct sock' we still take the cgroup from 'current' because some of them work on the socket that hasn't been properly initialized yet. Behind the scenes, we allocate a shim program that is attached to the trampoline and runs cgroup effective BPF programs array. This shim has some rudimentary ref counting and can be shared between several programs attaching to the same lsm hook from different cgroups. Note that this patch bloats cgroup size because we add 211 cgroup_bpf_attach_type(s) for simplicity sake. This will be addressed in the subsequent patch. Also note that we only add non-sleepable flavor for now. To enable sleepable use-cases, bpf_prog_run_array_cg has to grab trace rcu, shim programs have to be freed via trace rcu, cgroup_bpf.effective should be also trace-rcu-managed + maybe some other changes that I'm not aware of. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-4-sdf@google.com Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 24 +++-- include/linux/bpf-cgroup-defs.h | 8 ++ include/linux/bpf-cgroup.h | 7 ++ include/linux/bpf.h | 24 +++++ include/linux/bpf_lsm.h | 13 +++ include/linux/btf_ids.h | 3 +- include/uapi/linux/bpf.h | 1 + kernel/bpf/bpf_lsm.c | 48 ++++++++++ kernel/bpf/btf.c | 11 +++ kernel/bpf/cgroup.c | 136 ++++++++++++++++++++++++--- kernel/bpf/core.c | 2 + kernel/bpf/syscall.c | 10 ++ kernel/bpf/trampoline.c | 198 ++++++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 32 +++++++ tools/include/uapi/linux/bpf.h | 1 + 15 files changed, 498 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 2c51ca9f7cec..2f460c67f9c7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1770,6 +1770,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_link *l, int stack_size, int run_ctx_off, bool save_ret) { + void (*exit)(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit; + u64 (*enter)(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter; u8 *prog = *pprog; u8 *jmp_insn; int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); @@ -1788,15 +1792,21 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off); + if (p->aux->sleepable) { + enter = __bpf_prog_enter_sleepable; + exit = __bpf_prog_exit_sleepable; + } else if (p->expected_attach_type == BPF_LSM_CGROUP) { + enter = __bpf_prog_enter_lsm_cgroup; + exit = __bpf_prog_exit_lsm_cgroup; + } + /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: lea rsi, [rbp - ctx_cookie_off] */ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); - if (emit_call(&prog, - p->aux->sleepable ? __bpf_prog_enter_sleepable : - __bpf_prog_enter, prog)) - return -EINVAL; + if (emit_call(&prog, enter, prog)) + return -EINVAL; /* remember prog start time returned by __bpf_prog_enter */ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); @@ -1840,10 +1850,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); /* arg3: lea rdx, [rbp - run_ctx_off] */ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); - if (emit_call(&prog, - p->aux->sleepable ? __bpf_prog_exit_sleepable : - __bpf_prog_exit, prog)) - return -EINVAL; + if (emit_call(&prog, exit, prog)) + return -EINVAL; *pprog = prog; return 0; diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 5d268e76d8e6..b99f8c3e37ea 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -10,6 +10,12 @@ struct bpf_prog_array; +#ifdef CONFIG_BPF_LSM +#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */ +#else +#define CGROUP_LSM_NUM 0 +#endif + enum cgroup_bpf_attach_type { CGROUP_BPF_ATTACH_TYPE_INVALID = -1, CGROUP_INET_INGRESS = 0, @@ -35,6 +41,8 @@ enum cgroup_bpf_attach_type { CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, + CGROUP_LSM_START, + CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, MAX_CGROUP_BPF_ATTACH_TYPE }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 6673acfbf2ef..2bd1b5f8de9b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -23,6 +23,13 @@ struct ctl_table; struct ctl_table_header; struct task_struct; +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, + const struct bpf_insn *insn); +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, + const struct bpf_insn *insn); +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, + const struct bpf_insn *insn); + #ifdef CONFIG_CGROUP_BPF #define CGROUP_ATYPE(type) \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d547be9db75f..77cd613a00bd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -794,6 +794,10 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -1060,6 +1064,7 @@ struct bpf_prog_aux { struct user_struct *user; u64 load_time; /* ns since boottime */ u32 verified_insns; + int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY @@ -1167,6 +1172,11 @@ struct bpf_tramp_link { u64 cookie; }; +struct bpf_shim_tramp_link { + struct bpf_tramp_link link; + struct bpf_trampoline *trampoline; +}; + struct bpf_tracing_link { struct bpf_tramp_link link; enum bpf_attach_type attach_type; @@ -1245,6 +1255,9 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype); +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { @@ -1268,6 +1281,14 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype) +{ + return -EOPNOTSUPP; +} +static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) +{ +} #endif struct bpf_array { @@ -2368,6 +2389,8 @@ extern const struct bpf_func_proto bpf_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; +extern const struct bpf_func_proto bpf_set_retval_proto; +extern const struct bpf_func_proto bpf_get_retval_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2485,6 +2508,7 @@ int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); +int btf_id_set_index(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 479c101546ad..61787a5f6af9 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -42,6 +42,9 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); +int bpf_lsm_hook_idx(u32 btf_id); +void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); + #else /* !CONFIG_BPF_LSM */ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) @@ -65,6 +68,16 @@ static inline void bpf_inode_storage_free(struct inode *inode) { } +static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, + bpf_func_t *bpf_func) +{ +} + +static inline int bpf_lsm_hook_idx(u32 btf_id) +{ + return -EINVAL; +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 335a19092368..252a4befeab1 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -179,7 +179,8 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket) enum { #define BTF_SOCK_TYPE(name, str) name, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e81362891596..b7479898c879 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -998,6 +998,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, + BPF_LSM_CGROUP, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index c1351df9f7ee..0f72020bfdcf 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -16,6 +16,7 @@ #include #include #include +#include /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached. @@ -35,6 +36,44 @@ BTF_SET_START(bpf_lsm_hooks) #undef LSM_HOOK BTF_SET_END(bpf_lsm_hooks) +/* List of LSM hooks that should operate on 'current' cgroup regardless + * of function signature. + */ +BTF_SET_START(bpf_lsm_current_hooks) +/* operate on freshly allocated sk without any cgroup association */ +BTF_ID(func, bpf_lsm_sk_alloc_security) +BTF_ID(func, bpf_lsm_sk_free_security) +BTF_SET_END(bpf_lsm_current_hooks) + +void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, + bpf_func_t *bpf_func) +{ + const struct btf_param *args; + + if (btf_type_vlen(prog->aux->attach_func_proto) < 1 || + btf_id_set_contains(&bpf_lsm_current_hooks, + prog->aux->attach_btf_id)) { + *bpf_func = __cgroup_bpf_run_lsm_current; + return; + } + + args = btf_params(prog->aux->attach_func_proto); + +#ifdef CONFIG_NET + if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET]) + *bpf_func = __cgroup_bpf_run_lsm_socket; + else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK]) + *bpf_func = __cgroup_bpf_run_lsm_sock; + else +#endif + *bpf_func = __cgroup_bpf_run_lsm_current; +} + +int bpf_lsm_hook_idx(u32 btf_id) +{ + return btf_id_set_index(&bpf_lsm_hooks, btf_id); +} + int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { @@ -158,6 +197,15 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL; case BPF_FUNC_get_attach_cookie: return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL; + case BPF_FUNC_get_local_storage: + return prog->expected_attach_type == BPF_LSM_CGROUP ? + &bpf_get_local_storage_proto : NULL; + case BPF_FUNC_set_retval: + return prog->expected_attach_type == BPF_LSM_CGROUP ? + &bpf_set_retval_proto : NULL; + case BPF_FUNC_get_retval: + return prog->expected_attach_type == BPF_LSM_CGROUP ? + &bpf_get_retval_proto : NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2e2066d6af94..7c1fe422ed3f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5363,6 +5363,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (arg == nr_args) { switch (prog->expected_attach_type) { + case BPF_LSM_CGROUP: case BPF_LSM_MAC: case BPF_TRACE_FEXIT: /* When LSM programs are attached to void LSM hooks @@ -6842,6 +6843,16 @@ static int btf_id_cmp_func(const void *a, const void *b) return *pa - *pb; } +int btf_id_set_index(const struct btf_id_set *set, u32 id) +{ + const u32 *p; + + p = bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func); + if (!p) + return -1; + return p - set->ids; +} + bool btf_id_set_contains(const struct btf_id_set *set, u32 id) { return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 4adb4f3ecb7f..9cf41dd4f96f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -61,6 +63,87 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, return run_ctx.retval; } +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, + const struct bpf_insn *insn) +{ + const struct bpf_prog *shim_prog; + struct sock *sk; + struct cgroup *cgrp; + int ret = 0; + u64 *args; + + args = (u64 *)ctx; + sk = (void *)(unsigned long)args[0]; + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); + + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + if (likely(cgrp)) + ret = bpf_prog_run_array_cg(&cgrp->bpf, + shim_prog->aux->cgroup_atype, + ctx, bpf_prog_run, 0, NULL); + return ret; +} + +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, + const struct bpf_insn *insn) +{ + const struct bpf_prog *shim_prog; + struct socket *sock; + struct cgroup *cgrp; + int ret = 0; + u64 *args; + + args = (u64 *)ctx; + sock = (void *)(unsigned long)args[0]; + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); + + cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); + if (likely(cgrp)) + ret = bpf_prog_run_array_cg(&cgrp->bpf, + shim_prog->aux->cgroup_atype, + ctx, bpf_prog_run, 0, NULL); + return ret; +} + +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, + const struct bpf_insn *insn) +{ + const struct bpf_prog *shim_prog; + struct cgroup *cgrp; + int ret = 0; + + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); + + /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */ + cgrp = task_dfl_cgroup(current); + if (likely(cgrp)) + ret = bpf_prog_run_array_cg(&cgrp->bpf, + shim_prog->aux->cgroup_atype, + ctx, bpf_prog_run, 0, NULL); + return ret; +} + +#ifdef CONFIG_BPF_LSM +static enum cgroup_bpf_attach_type +bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) +{ + if (attach_type != BPF_LSM_CGROUP) + return to_cgroup_bpf_attach_type(attach_type); + return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id); +} +#else +static enum cgroup_bpf_attach_type +bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) +{ + if (attach_type != BPF_LSM_CGROUP) + return to_cgroup_bpf_attach_type(attach_type); + return -EOPNOTSUPP; +} +#endif /* CONFIG_BPF_LSM */ + void cgroup_bpf_offline(struct cgroup *cgrp) { cgroup_get(cgrp); @@ -163,10 +246,16 @@ static void cgroup_bpf_release(struct work_struct *work) hlist_for_each_entry_safe(pl, pltmp, progs, node) { hlist_del(&pl->node); - if (pl->prog) + if (pl->prog) { + if (pl->prog->expected_attach_type == BPF_LSM_CGROUP) + bpf_trampoline_unlink_cgroup_shim(pl->prog); bpf_prog_put(pl->prog); - if (pl->link) + } + if (pl->link) { + if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP) + bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog); bpf_cgroup_link_auto_detach(pl->link); + } kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key[atype]); } @@ -479,6 +568,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *old_prog = NULL; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; + struct bpf_prog *new_prog = prog ? : link->link.prog; enum cgroup_bpf_attach_type atype; struct bpf_prog_list *pl; struct hlist_head *progs; @@ -495,7 +585,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, /* replace_prog implies BPF_F_REPLACE, and vice versa */ return -EINVAL; - atype = to_cgroup_bpf_attach_type(type); + atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id); if (atype < 0) return -EINVAL; @@ -549,17 +639,30 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, bpf_cgroup_storages_assign(pl->storage, storage); cgrp->bpf.flags[atype] = saved_flags; + if (type == BPF_LSM_CGROUP) { + err = bpf_trampoline_link_cgroup_shim(new_prog, atype); + if (err) + goto cleanup; + } + err = update_effective_progs(cgrp, atype); if (err) - goto cleanup; + goto cleanup_trampoline; - if (old_prog) + if (old_prog) { + if (type == BPF_LSM_CGROUP) + bpf_trampoline_unlink_cgroup_shim(old_prog); bpf_prog_put(old_prog); - else + } else { static_branch_inc(&cgroup_bpf_enabled_key[atype]); + } bpf_cgroup_storages_link(new_storage, cgrp, type); return 0; +cleanup_trampoline: + if (type == BPF_LSM_CGROUP) + bpf_trampoline_unlink_cgroup_shim(new_prog); + cleanup: if (old_prog) { pl->prog = old_prog; @@ -651,7 +754,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, struct hlist_head *progs; bool found = false; - atype = to_cgroup_bpf_attach_type(link->type); + atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id); if (atype < 0) return -EINVAL; @@ -803,9 +906,15 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_prog *old_prog; struct bpf_prog_list *pl; struct hlist_head *progs; + u32 attach_btf_id = 0; u32 flags; - atype = to_cgroup_bpf_attach_type(type); + if (prog) + attach_btf_id = prog->aux->attach_btf_id; + if (link) + attach_btf_id = link->link.prog->aux->attach_btf_id; + + atype = bpf_cgroup_atype_find(type, attach_btf_id); if (atype < 0) return -EINVAL; @@ -839,8 +948,11 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, if (hlist_empty(progs)) /* last program was detached, reset flags to zero */ cgrp->bpf.flags[atype] = 0; - if (old_prog) + if (old_prog) { + if (type == BPF_LSM_CGROUP) + bpf_trampoline_unlink_cgroup_shim(old_prog); bpf_prog_put(old_prog); + } static_branch_dec(&cgroup_bpf_enabled_key[atype]); return 0; } @@ -999,6 +1111,8 @@ static void bpf_cgroup_link_release(struct bpf_link *link) WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, cg_link->type)); + if (cg_link->type == BPF_LSM_CGROUP) + bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog); cg = cg_link->cgroup; cg_link->cgroup = NULL; @@ -1343,7 +1457,7 @@ BPF_CALL_0(bpf_get_retval) return ctx->retval; } -static const struct bpf_func_proto bpf_get_retval_proto = { +const struct bpf_func_proto bpf_get_retval_proto = { .func = bpf_get_retval, .gpl_only = false, .ret_type = RET_INTEGER, @@ -1358,7 +1472,7 @@ BPF_CALL_1(bpf_set_retval, int, retval) return 0; } -static const struct bpf_func_proto bpf_set_retval_proto = { +const struct bpf_func_proto bpf_set_retval_proto = { .func = bpf_set_retval, .gpl_only = false, .ret_type = RET_INTEGER, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f023cb399e3f..4cc10b942a3c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2666,6 +2666,8 @@ const struct bpf_func_proto bpf_get_local_storage_proto __weak; const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_snprintf_btf_proto __weak; const struct bpf_func_proto bpf_seq_printf_btf_proto __weak; +const struct bpf_func_proto bpf_set_retval_proto __weak; +const struct bpf_func_proto bpf_get_retval_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7d5af5b99f0d..626b8f7d237b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3416,6 +3416,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_SK_LOOKUP; case BPF_XDP: return BPF_PROG_TYPE_XDP; + case BPF_LSM_CGROUP: + return BPF_PROG_TYPE_LSM; default: return BPF_PROG_TYPE_UNSPEC; } @@ -3469,6 +3471,11 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_LSM: + if (ptype == BPF_PROG_TYPE_LSM && + prog->expected_attach_type != BPF_LSM_CGROUP) + return -EINVAL; + ret = cgroup_bpf_prog_attach(attr, ptype, prog); break; default: @@ -3506,6 +3513,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_LSM: return cgroup_bpf_prog_detach(attr, ptype); default: return -EINVAL; @@ -4540,6 +4548,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = bpf_raw_tp_link_attach(prog, NULL); else if (prog->expected_attach_type == BPF_TRACE_ITER) ret = bpf_iter_link_attach(attr, uattr, prog); + else if (prog->expected_attach_type == BPF_LSM_CGROUP) + ret = cgroup_bpf_link_attach(attr, prog); else ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5466e15be61f..d7c251d7fbcd 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -496,6 +498,177 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin return err; } +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +static void bpf_shim_tramp_link_release(struct bpf_link *link) +{ + struct bpf_shim_tramp_link *shim_link = + container_of(link, struct bpf_shim_tramp_link, link.link); + + /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */ + if (!shim_link->trampoline) + return; + + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline)); + bpf_trampoline_put(shim_link->trampoline); +} + +static void bpf_shim_tramp_link_dealloc(struct bpf_link *link) +{ + struct bpf_shim_tramp_link *shim_link = + container_of(link, struct bpf_shim_tramp_link, link.link); + + kfree(shim_link); +} + +static const struct bpf_link_ops bpf_shim_tramp_link_lops = { + .release = bpf_shim_tramp_link_release, + .dealloc = bpf_shim_tramp_link_dealloc, +}; + +static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog, + bpf_func_t bpf_func, + int cgroup_atype) +{ + struct bpf_shim_tramp_link *shim_link = NULL; + struct bpf_prog *p; + + shim_link = kzalloc(sizeof(*shim_link), GFP_USER); + if (!shim_link) + return NULL; + + p = bpf_prog_alloc(1, 0); + if (!p) { + kfree(shim_link); + return NULL; + } + + p->jited = false; + p->bpf_func = bpf_func; + + p->aux->cgroup_atype = cgroup_atype; + p->aux->attach_func_proto = prog->aux->attach_func_proto; + p->aux->attach_btf_id = prog->aux->attach_btf_id; + p->aux->attach_btf = prog->aux->attach_btf; + btf_get(p->aux->attach_btf); + p->type = BPF_PROG_TYPE_LSM; + p->expected_attach_type = BPF_LSM_MAC; + bpf_prog_inc(p); + bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC, + &bpf_shim_tramp_link_lops, p); + + return shim_link; +} + +static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr, + bpf_func_t bpf_func) +{ + struct bpf_tramp_link *link; + int kind; + + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { + hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) { + struct bpf_prog *p = link->link.prog; + + if (p->bpf_func == bpf_func) + return container_of(link, struct bpf_shim_tramp_link, link); + } + } + + return NULL; +} + +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype) +{ + struct bpf_shim_tramp_link *shim_link = NULL; + struct bpf_attach_target_info tgt_info = {}; + struct bpf_trampoline *tr; + bpf_func_t bpf_func; + u64 key; + int err; + + err = bpf_check_attach_target(NULL, prog, NULL, + prog->aux->attach_btf_id, + &tgt_info); + if (err) + return err; + + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, + prog->aux->attach_btf_id); + + bpf_lsm_find_cgroup_shim(prog, &bpf_func); + tr = bpf_trampoline_get(key, &tgt_info); + if (!tr) + return -ENOMEM; + + mutex_lock(&tr->mutex); + + shim_link = cgroup_shim_find(tr, bpf_func); + if (shim_link) { + /* Reusing existing shim attached by the other program. */ + bpf_link_inc(&shim_link->link.link); + + mutex_unlock(&tr->mutex); + bpf_trampoline_put(tr); /* bpf_trampoline_get above */ + return 0; + } + + /* Allocate and install new shim. */ + + shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype); + if (!shim_link) { + err = -ENOMEM; + goto err; + } + + err = __bpf_trampoline_link_prog(&shim_link->link, tr); + if (err) + goto err; + + shim_link->trampoline = tr; + /* note, we're still holding tr refcnt from above */ + + mutex_unlock(&tr->mutex); + + return 0; +err: + mutex_unlock(&tr->mutex); + + if (shim_link) + bpf_link_put(&shim_link->link.link); + + /* have to release tr while _not_ holding its mutex */ + bpf_trampoline_put(tr); /* bpf_trampoline_get above */ + + return err; +} + +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) +{ + struct bpf_shim_tramp_link *shim_link = NULL; + struct bpf_trampoline *tr; + bpf_func_t bpf_func; + u64 key; + + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, + prog->aux->attach_btf_id); + + bpf_lsm_find_cgroup_shim(prog, &bpf_func); + tr = bpf_trampoline_lookup(key); + if (WARN_ON_ONCE(!tr)) + return; + + mutex_lock(&tr->mutex); + shim_link = cgroup_shim_find(tr, bpf_func); + mutex_unlock(&tr->mutex); + + if (shim_link) + bpf_link_put(&shim_link->link.link); + + bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */ +} +#endif + struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { @@ -628,6 +801,31 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_ rcu_read_unlock(); } +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) + __acquires(RCU) +{ + /* Runtime stats are exported via actual BPF_LSM_CGROUP + * programs, not the shims. + */ + rcu_read_lock(); + migrate_disable(); + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + + return NO_START_TIME; +} + +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) + __releases(RCU) +{ + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + + migrate_enable(); + rcu_read_unlock(); +} + u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) { rcu_read_lock_trace(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4938477912cd..df3ec6b05f05 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7322,6 +7322,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn reg_type_str(env, regs[BPF_REG_1].type)); return -EACCES; } + break; + case BPF_FUNC_set_retval: + if (env->prog->expected_attach_type == BPF_LSM_CGROUP) { + if (!env->prog->aux->attach_func_proto->type) { + /* Make sure programs that attach to void + * hooks don't try to modify return value. + */ + verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); + return -EINVAL; + } + } + break; } if (err) @@ -10527,6 +10539,22 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_SK_LOOKUP: range = tnum_range(SK_DROP, SK_PASS); break; + + case BPF_PROG_TYPE_LSM: + if (env->prog->expected_attach_type != BPF_LSM_CGROUP) { + /* Regular BPF_PROG_TYPE_LSM programs can return + * any value. + */ + return 0; + } + if (!env->prog->aux->attach_func_proto->type) { + /* Make sure programs that attach to void + * hooks don't try to modify return value. + */ + range = tnum_range(1, 1); + } + break; + case BPF_PROG_TYPE_EXT: /* freplace program can return anything as its return value * depends on the to-be-replaced kernel func or bpf program. @@ -10543,6 +10571,9 @@ static int check_return_code(struct bpf_verifier_env *env) if (!tnum_in(range, reg->var_off)) { verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); + if (prog->expected_attach_type == BPF_LSM_CGROUP && + !prog->aux->attach_func_proto->type) + verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); return -EINVAL; } @@ -14902,6 +14933,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, fallthrough; case BPF_MODIFY_RETURN: case BPF_LSM_MAC: + case BPF_LSM_CGROUP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: if (!btf_type_is_func(t)) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e81362891596..b7479898c879 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -998,6 +998,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, + BPF_LSM_CGROUP, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From c0e19f2c9a3edd38e4b1bdae98eb44555d02bc31 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:07 -0700 Subject: bpf: minimize number of allocated lsm slots per program Previous patch adds 1:1 mapping between all 211 LSM hooks and bpf_cgroup program array. Instead of reserving a slot per possible hook, reserve 10 slots per cgroup for lsm programs. Those slots are dynamically allocated on demand and reclaimed. struct cgroup_bpf { struct bpf_prog_array * effective[33]; /* 0 264 */ /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */ struct hlist_head progs[33]; /* 264 264 */ /* --- cacheline 8 boundary (512 bytes) was 16 bytes ago --- */ u8 flags[33]; /* 528 33 */ /* XXX 7 bytes hole, try to pack */ struct list_head storages; /* 568 16 */ /* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */ struct bpf_prog_array * inactive; /* 584 8 */ struct percpu_ref refcnt; /* 592 16 */ struct work_struct release_work; /* 608 72 */ /* size: 680, cachelines: 11, members: 7 */ /* sum members: 673, holes: 1, sum holes: 7 */ /* last cacheline: 40 bytes */ }; Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-5-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup-defs.h | 3 ++- include/linux/bpf.h | 9 +++++++- include/linux/bpf_lsm.h | 6 ------ kernel/bpf/bpf_lsm.c | 5 ----- kernel/bpf/btf.c | 10 --------- kernel/bpf/cgroup.c | 47 ++++++++++++++++++++++++++++++++++++++++- kernel/bpf/core.c | 7 ++++++ kernel/bpf/trampoline.c | 1 + 8 files changed, 64 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index b99f8c3e37ea..7b121bd780eb 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -11,7 +11,8 @@ struct bpf_prog_array; #ifdef CONFIG_BPF_LSM -#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */ +/* Maximum number of concurrently attachable per-cgroup LSM hooks. */ +#define CGROUP_LSM_NUM 10 #else #define CGROUP_LSM_NUM 0 #endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 77cd613a00bd..5d2afa55c7c3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2508,7 +2508,6 @@ int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); -int btf_id_set_index(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 @@ -2545,4 +2544,12 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); int bpf_dynptr_check_size(u32 size); +#ifdef CONFIG_BPF_LSM +void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); +void bpf_cgroup_atype_put(int cgroup_atype); +#else +static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {} +static inline void bpf_cgroup_atype_put(int cgroup_atype) {} +#endif /* CONFIG_BPF_LSM */ + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 61787a5f6af9..4bcf76a9bb06 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -42,7 +42,6 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); -int bpf_lsm_hook_idx(u32 btf_id); void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); #else /* !CONFIG_BPF_LSM */ @@ -73,11 +72,6 @@ static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, { } -static inline int bpf_lsm_hook_idx(u32 btf_id) -{ - return -EINVAL; -} - #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 0f72020bfdcf..83aa431dd52e 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -69,11 +69,6 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, *bpf_func = __cgroup_bpf_run_lsm_current; } -int bpf_lsm_hook_idx(u32 btf_id) -{ - return btf_id_set_index(&bpf_lsm_hooks, btf_id); -} - int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7c1fe422ed3f..8d3c7ab8af46 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6843,16 +6843,6 @@ static int btf_id_cmp_func(const void *a, const void *b) return *pa - *pb; } -int btf_id_set_index(const struct btf_id_set *set, u32 id) -{ - const u32 *p; - - p = bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func); - if (!p) - return -1; - return p - set->ids; -} - bool btf_id_set_contains(const struct btf_id_set *set, u32 id) { return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9cf41dd4f96f..169cbd0de797 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -127,12 +127,57 @@ unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, } #ifdef CONFIG_BPF_LSM +struct cgroup_lsm_atype { + u32 attach_btf_id; + int refcnt; +}; + +static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM]; + static enum cgroup_bpf_attach_type bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) { + int i; + + lockdep_assert_held(&cgroup_mutex); + if (attach_type != BPF_LSM_CGROUP) return to_cgroup_bpf_attach_type(attach_type); - return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id); + + for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) + if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id) + return CGROUP_LSM_START + i; + + for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) + if (cgroup_lsm_atype[i].attach_btf_id == 0) + return CGROUP_LSM_START + i; + + return -E2BIG; + +} + +void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) +{ + int i = cgroup_atype - CGROUP_LSM_START; + + lockdep_assert_held(&cgroup_mutex); + + WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id && + cgroup_lsm_atype[i].attach_btf_id != attach_btf_id); + + cgroup_lsm_atype[i].attach_btf_id = attach_btf_id; + cgroup_lsm_atype[i].refcnt++; +} + +void bpf_cgroup_atype_put(int cgroup_atype) +{ + int i = cgroup_atype - CGROUP_LSM_START; + + mutex_lock(&cgroup_mutex); + if (--cgroup_lsm_atype[i].refcnt <= 0) + cgroup_lsm_atype[i].attach_btf_id = 0; + WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0); + mutex_unlock(&cgroup_mutex); } #else static enum cgroup_bpf_attach_type diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4cc10b942a3c..805c2ad5c793 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -107,6 +107,9 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag fp->aux->prog = fp; fp->jit_requested = ebpf_jit_enabled(); fp->blinding_requested = bpf_jit_blinding_enabled(fp); +#ifdef CONFIG_CGROUP_BPF + aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID; +#endif INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); mutex_init(&fp->aux->used_maps_mutex); @@ -2569,6 +2572,10 @@ static void bpf_prog_free_deferred(struct work_struct *work) aux = container_of(work, struct bpf_prog_aux, work); #ifdef CONFIG_BPF_SYSCALL bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); +#endif +#ifdef CONFIG_CGROUP_BPF + if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID) + bpf_cgroup_atype_put(aux->cgroup_atype); #endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d7c251d7fbcd..6cd226584c33 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -555,6 +555,7 @@ static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog bpf_prog_inc(p); bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC, &bpf_shim_tramp_link_lops, p); + bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype); return shim_link; } -- cgit v1.2.3 From 9113d7e48e9128522b9f5a54dfd30dff10509a92 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:09 -0700 Subject: bpf: expose bpf_{g,s}etsockopt to lsm cgroup I don't see how to make it nice without introducing btf id lists for the hooks where these helpers are allowed. Some LSM hooks work on the locked sockets, some are triggering early and don't grab any locks, so have two lists for now: 1. LSM hooks which trigger under socket lock - minority of the hooks, but ideal case for us, we can expose existing BTF-based helpers 2. LSM hooks which trigger without socket lock, but they trigger early in the socket creation path where it should be safe to do setsockopt without any locks 3. The rest are prohibited. I'm thinking that this use-case might be a good gateway to sleeping lsm cgroup hooks in the future. We can either expose lock/unlock operations (and add tracking to the verifier) or have another set of bpf_setsockopt wrapper that grab the locks and might sleep. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-7-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/bpf_lsm.c | 38 +++++++++++++++++++++++++++++++++ net/core/filter.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 93 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5d2afa55c7c3..2b21f2a3452f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2386,6 +2386,8 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; +extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 83aa431dd52e..d469b7f3deef 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -45,6 +45,24 @@ BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) BTF_SET_END(bpf_lsm_current_hooks) +/* List of LSM hooks that trigger while the socket is properly locked. + */ +BTF_SET_START(bpf_lsm_locked_sockopt_hooks) +BTF_ID(func, bpf_lsm_socket_sock_rcv_skb) +BTF_ID(func, bpf_lsm_sock_graft) +BTF_ID(func, bpf_lsm_inet_csk_clone) +BTF_ID(func, bpf_lsm_inet_conn_established) +BTF_SET_END(bpf_lsm_locked_sockopt_hooks) + +/* List of LSM hooks that trigger while the socket is _not_ locked, + * but it's ok to call bpf_{g,s}etsockopt because the socket is still + * in the early init phase. + */ +BTF_SET_START(bpf_lsm_unlocked_sockopt_hooks) +BTF_ID(func, bpf_lsm_socket_post_create) +BTF_ID(func, bpf_lsm_socket_socketpair) +BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks) + void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func) { @@ -201,6 +219,26 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_retval: return prog->expected_attach_type == BPF_LSM_CGROUP ? &bpf_get_retval_proto : NULL; + case BPF_FUNC_setsockopt: + if (prog->expected_attach_type != BPF_LSM_CGROUP) + return NULL; + if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_sk_setsockopt_proto; + if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_unlocked_sk_setsockopt_proto; + return NULL; + case BPF_FUNC_getsockopt: + if (prog->expected_attach_type != BPF_LSM_CGROUP) + return NULL; + if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_sk_getsockopt_proto; + if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks, + prog->aux->attach_btf_id)) + return &bpf_unlocked_sk_getsockopt_proto; + return NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/net/core/filter.c b/net/core/filter.c index 151aa4756bd6..c6941ab0eb52 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5012,8 +5012,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static int _bpf_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) +static int __bpf_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) { char devname[IFNAMSIZ]; int val, valbool; @@ -5024,8 +5024,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (!sk_fullsock(sk)) return -EINVAL; - sock_owned_by_me(sk); - if (level == SOL_SOCKET) { if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) return -EINVAL; @@ -5258,14 +5256,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, return ret; } -static int _bpf_getsockopt(struct sock *sk, int level, int optname, +static int _bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) +{ + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} + +static int __bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) { if (!sk_fullsock(sk)) goto err_clear; - sock_owned_by_me(sk); - if (level == SOL_SOCKET) { if (optlen != sizeof(int)) goto err_clear; @@ -5360,6 +5364,14 @@ err_clear: return -EINVAL; } +static int _bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + return __bpf_getsockopt(sk, level, optname, optval, optlen); +} + BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { @@ -5400,6 +5412,40 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = { + .func = bpf_unlocked_sk_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return __bpf_getsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = { + .func = bpf_unlocked_sk_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, int, level, int, optname, char *, optval, int, optlen) { -- cgit v1.2.3 From c381d02b2fd5f82d2207db1b9b25ff60d0d9c27c Mon Sep 17 00:00:00 2001 From: Yuwei Wang Date: Wed, 29 Jun 2022 08:48:31 +0000 Subject: sysctl: add proc_dointvec_ms_jiffies_minmax add proc_dointvec_ms_jiffies_minmax to fit read msecs value to jiffies with a limited range of values Signed-off-by: Yuwei Wang Signed-off-by: Paolo Abeni --- include/linux/sysctl.h | 2 ++ kernel/sysctl.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80263f7cdb77..17b42ce89d3e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -75,6 +75,8 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e52b6e372c60..85c92e2c2570 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1237,6 +1237,30 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, return 0; } +static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp, + int *valp, int write, void *data) +{ + int tmp, ret; + struct do_proc_dointvec_minmax_conv_param *param = data; + /* + * If writing, first do so via a temporary local int so we can + * bounds-check it before touching *valp. + */ + int *ip = write ? &tmp : valp; + + ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data); + if (ret) + return ret; + + if (write) { + if ((param->min && *param->min > tmp) || + (param->max && *param->max < tmp)) + return -EINVAL; + *valp = tmp; + } + return 0; +} + /** * proc_dointvec_jiffies - read a vector of integers as seconds * @table: the sysctl table @@ -1259,6 +1283,17 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, do_proc_dointvec_jiffies_conv,NULL); } +int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct do_proc_dointvec_minmax_conv_param param = { + .min = (int *) table->extra1, + .max = (int *) table->extra2, + }; + return do_proc_dointvec(table, write, buffer, lenp, ppos, + do_proc_dointvec_ms_jiffies_minmax_conv, ¶m); +} + /** * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds * @table: the sysctl table @@ -1523,6 +1558,12 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, return -ENOSYS; } +int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { -- cgit v1.2.3 From 837ced3a1a5d8bb1a637dd584711f31ae6b54d93 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 28 Jun 2022 17:52:38 +0300 Subject: time64.h: consolidate uses of PSEC_PER_NSEC Time-sensitive networking code needs to work with PTP times expressed in nanoseconds, and with packet transmission times expressed in picoseconds, since those would be fractional at higher than gigabit speed when expressed in nanoseconds. Convert the existing uses in tc-taprio and the ocelot/felix DSA driver to a PSEC_PER_NSEC macro. This macro is placed in include/linux/time64.h as opposed to its relatives (PSEC_PER_SEC etc) from include/vdso/time64.h because the vDSO library does not (yet) need/use it. Cc: Andy Lutomirski Cc: Thomas Gleixner Signed-off-by: Vladimir Oltean Reviewed-by: Vincenzo Frascino # for the vDSO parts Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 5 +++-- include/linux/time64.h | 3 +++ net/sched/sch_taprio.c | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index f51d9d5f0d82..61ed317602e7 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "felix.h" #define VSC9959_NUM_PORTS 6 @@ -1235,7 +1236,7 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) u32 max_sdu; if (min_gate_len[tc] == U64_MAX /* Gate always open */ || - min_gate_len[tc] * 1000 > needed_bit_time_ps) { + min_gate_len[tc] * PSEC_PER_NSEC > needed_bit_time_ps) { /* Setting QMAXSDU_CFG to 0 disables oversized frame * dropping. */ @@ -1249,7 +1250,7 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) * frame, make sure to enable oversize frame dropping * for frames larger than the smallest that would fit. */ - max_sdu = div_u64(min_gate_len[tc] * 1000, + max_sdu = div_u64(min_gate_len[tc] * PSEC_PER_NSEC, picos_per_byte); /* A TC gate may be completely closed, which is a * special case where all packets are oversized. diff --git a/include/linux/time64.h b/include/linux/time64.h index 81b9686a2079..2fb8232cff1d 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -20,6 +20,9 @@ struct itimerspec64 { struct timespec64 it_value; }; +/* Parameters used to convert the timespec values: */ +#define PSEC_PER_NSEC 1000L + /* Located here for timespec[64]_valid_strict */ #define TIME64_MAX ((s64)~((u64)1 << 63)) #define TIME64_MIN (-TIME64_MAX - 1) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b9c71a304d39..0b941dd63d26 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -176,7 +177,7 @@ static ktime_t get_interval_end_time(struct sched_gate_list *sched, static int length_to_duration(struct taprio_sched *q, int len) { - return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); + return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC); } /* Returns the entry corresponding to next available interval. If @@ -551,7 +552,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, - div64_u64((u64)entry->interval * 1000, + div64_u64((u64)entry->interval * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte))); } -- cgit v1.2.3 From 6708be40047789aa3587a3866b782d5cda7b2a31 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Wed, 22 Jun 2022 09:08:20 +0800 Subject: wifi: ieee80211: s1g action frames are not robust S1g action frame with code 22 is not protected so update the robust action frame list. Signed-off-by: Peter Chiu Link: https://lore.kernel.org/r/20220622010820.17522-1-chui-hao.chiu@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 870f659087d6..f386f9ed41f3 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4094,6 +4094,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) *category != WLAN_CATEGORY_SELF_PROTECTED && *category != WLAN_CATEGORY_UNPROT_DMG && *category != WLAN_CATEGORY_VHT && + *category != WLAN_CATEGORY_S1G && *category != WLAN_CATEGORY_VENDOR_SPECIFIC; } -- cgit v1.2.3 From 504148fedb854299972d164b001357b888a9193e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Jun 2022 15:07:50 +0000 Subject: net: add skb_[inner_]tcp_all_headers helpers Most drivers use "skb_transport_offset(skb) + tcp_hdrlen(skb)" to compute headers length for a TCP packet, but others use more convoluted (but equivalent) ways. Add skb_tcp_all_headers() and skb_inner_tcp_all_headers() helpers to harmonize this a bit. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 6 ++--- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 9 ++++--- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 8 +++--- drivers/net/ethernet/atheros/atlx/atl1.c | 7 +++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 17 +++++------- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++--- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/brocade/bna/bnad.c | 6 ++--- drivers/net/ethernet/cadence/macb_main.c | 2 +- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 4 +-- drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 6 ++--- drivers/net/ethernet/cisco/enic/enic_main.c | 5 ++-- drivers/net/ethernet/emulex/benet/be_main.c | 6 ++--- drivers/net/ethernet/freescale/fec_main.c | 2 +- drivers/net/ethernet/fungible/funeth/funeth_tx.c | 2 +- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 4 +-- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 6 ++--- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 +-- drivers/net/ethernet/hisilicon/hns3/hns3_trace.h | 3 +-- drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 +- drivers/net/ethernet/intel/e1000/e1000_main.c | 4 +-- drivers/net/ethernet/intel/e1000e/netdev.c | 4 +-- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 2 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- drivers/net/ethernet/marvell/mvneta.c | 4 +-- .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 4 +-- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 4 +-- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 4 +-- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- drivers/net/ethernet/netronome/nfp/nfd3/dp.c | 5 ++-- drivers/net/ethernet/netronome/nfp/nfdk/dp.c | 5 ++-- .../net/ethernet/netronome/nfp/nfp_net_common.c | 7 +++-- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 5 ++-- .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_fp.c | 8 +++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 2 +- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 4 +-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/synopsys/dwc-xlgmac-net.c | 2 +- drivers/net/wireless/ath/wil6210/txrx.c | 4 +-- drivers/net/xen-netback/netback.c | 4 +-- drivers/staging/qlge/qlge_main.c | 2 +- include/linux/tcp.h | 30 ++++++++++++++++++++++ net/tls/tls_device_fallback.c | 6 ++--- 48 files changed, 119 insertions(+), 115 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 2c3dca41d3bd..f7995519bbc8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -573,7 +573,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { - hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + hlen = skb_tcp_all_headers(skb); phead = skb->data; if (unlikely(!skb_pull(skb, hlen))) { ipoib_warn(priv, "linear data too small\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 4d46780fad13..f342bb853189 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1673,12 +1673,10 @@ static int xgbe_prep_tso(struct sk_buff *skb, struct xgbe_packet_data *packet) return ret; if (XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, VXLAN)) { - packet->header_len = skb_inner_transport_offset(skb) + - inner_tcp_hdrlen(skb); + packet->header_len = skb_inner_tcp_all_headers(skb); packet->tcp_header_len = inner_tcp_hdrlen(skb); } else { - packet->header_len = skb_transport_offset(skb) + - tcp_hdrlen(skb); + packet->header_len = skb_tcp_all_headers(skb); packet->tcp_header_len = tcp_hdrlen(skb); } packet->tcp_payload_len = skb->len - packet->header_len; diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 24fe967c18cd..948584761e66 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2072,7 +2072,7 @@ static u16 atl1c_cal_tpd_req(const struct sk_buff *skb) tpd_req = skb_shinfo(skb)->nr_frags + 1; if (skb_is_gso(skb)) { - proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + proto_hdr_len = skb_tcp_all_headers(skb); if (proto_hdr_len < skb_headlen(skb)) tpd_req++; if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) @@ -2107,7 +2107,7 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter, if (real_len < skb->len) pskb_trim(skb, real_len); - hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb)); + hdr_len = skb_tcp_all_headers(skb); if (unlikely(skb->len == hdr_len)) { /* only xsum need */ if (netif_msg_tx_queued(adapter)) @@ -2132,7 +2132,7 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter, *tpd = atl1c_get_tpd(adapter, queue); ipv6_hdr(skb)->payload_len = 0; /* check payload == 0 byte ? */ - hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb)); + hdr_len = skb_tcp_all_headers(skb); if (unlikely(skb->len == hdr_len)) { /* only xsum need */ if (netif_msg_tx_queued(adapter)) @@ -2219,7 +2219,8 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter, tso = (tpd->word1 >> TPD_LSO_EN_SHIFT) & TPD_LSO_EN_MASK; if (tso) { /* TSO */ - map_len = hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); + map_len = hdr_len; use_tpd = tpd; buffer_info = atl1c_get_tx_buffer(adapter, use_tpd); diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 4fc9e6350c49..57a51fb7746c 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1609,8 +1609,7 @@ static u16 atl1e_cal_tdp_req(const struct sk_buff *skb) if (skb_is_gso(skb)) { if (skb->protocol == htons(ETH_P_IP) || (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6)) { - proto_hdr_len = skb_transport_offset(skb) + - tcp_hdrlen(skb); + proto_hdr_len = skb_tcp_all_headers(skb); if (proto_hdr_len < skb_headlen(skb)) { tpd_req += ((skb_headlen(skb) - proto_hdr_len + MAX_TX_BUF_LEN - 1) >> @@ -1645,7 +1644,7 @@ static int atl1e_tso_csum(struct atl1e_adapter *adapter, if (real_len < skb->len) pskb_trim(skb, real_len); - hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb)); + hdr_len = skb_tcp_all_headers(skb); if (unlikely(skb->len == hdr_len)) { /* only xsum need */ netdev_warn(adapter->netdev, @@ -1713,7 +1712,8 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter, segment = (tpd->word3 >> TPD_SEGMENT_EN_SHIFT) & TPD_SEGMENT_EN_MASK; if (segment) { /* TSO */ - map_len = hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); + map_len = hdr_len; use_tpd = tpd; tx_buffer = atl1e_get_tx_buffer(adapter, use_tpd); diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 6a969969d221..ff1fe09abf9f 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2115,7 +2115,7 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb, ntohs(iph->tot_len)); if (real_len < skb->len) pskb_trim(skb, real_len); - hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb)); + hdr_len = skb_tcp_all_headers(skb); if (skb->len == hdr_len) { iph->check = 0; tcp_hdr(skb)->check = @@ -2206,7 +2206,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, retval = (ptpd->word3 >> TPD_SEGMENT_EN_SHIFT) & TPD_SEGMENT_EN_MASK; if (retval) { /* TSO */ - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); buffer_info->length = hdr_len; page = virt_to_page(skb->data); offset = offset_in_page(skb->data); @@ -2367,8 +2367,7 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, mss = skb_shinfo(skb)->gso_size; if (mss) { if (skb->protocol == htons(ETH_P_IP)) { - proto_hdr_len = (skb_transport_offset(skb) + - tcp_hdrlen(skb)); + proto_hdr_len = skb_tcp_all_headers(skb); if (unlikely(proto_hdr_len > len)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 5729a5ab059d..712b5595bc39 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3421,12 +3421,9 @@ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb, /* Headers length */ if (xmit_type & XMIT_GSO_ENC) - hlen = (int)(skb_inner_transport_header(skb) - - skb->data) + - inner_tcp_hdrlen(skb); + hlen = skb_inner_tcp_all_headers(skb); else - hlen = (int)(skb_transport_header(skb) - - skb->data) + tcp_hdrlen(skb); + hlen = skb_tcp_all_headers(skb); /* Amount of data (w/o headers) on linear part of SKB*/ first_bd_sz = skb_headlen(skb) - hlen; @@ -3534,15 +3531,13 @@ static u8 bnx2x_set_pbd_csum_enc(struct bnx2x *bp, struct sk_buff *skb, ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) & ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW; - return skb_inner_transport_header(skb) + - inner_tcp_hdrlen(skb) - skb->data; + return skb_inner_tcp_all_headers(skb); } /* We support checksum offload for TCP and UDP only. * No need to pass the UDP header length - it's a constant. */ - return skb_inner_transport_header(skb) + - sizeof(struct udphdr) - skb->data; + return skb_inner_transport_offset(skb) + sizeof(struct udphdr); } /** @@ -3568,12 +3563,12 @@ static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb, ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) & ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW; - return skb_transport_header(skb) + tcp_hdrlen(skb) - skb->data; + return skb_tcp_all_headers(skb); } /* We support checksum offload for TCP and UDP only. * No need to pass the UDP header length - it's a constant. */ - return skb_transport_header(skb) + sizeof(struct udphdr) - skb->data; + return skb_transport_offset(skb) + sizeof(struct udphdr); } /* set FW indication according to inner or outer protocols if tunneled */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b474a4fe4039..11d35da61921 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -535,12 +535,9 @@ normal_tx: u32 hdr_len; if (skb->encapsulation) - hdr_len = skb_inner_network_offset(skb) + - skb_inner_network_header_len(skb) + - inner_tcp_hdrlen(skb); + hdr_len = skb_inner_tcp_all_headers(skb); else - hdr_len = skb_transport_offset(skb) + - tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); txbd1->tx_bd_hsize_lflags |= cpu_to_le32(TX_BD_FLAGS_LSO | TX_BD_FLAGS_T_IPID | diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index c28f8cc00d1c..db1e9d810b41 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7944,7 +7944,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) iph = ip_hdr(skb); tcp_opt_len = tcp_optlen(skb); - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb) - ETH_HLEN; + hdr_len = skb_tcp_all_headers(skb) - ETH_HLEN; /* HW/FW can not correctly segment packets that have been * vlan encapsulated. diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index f6fe08df568b..29dd0f93d6c0 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -2823,8 +2823,7 @@ bnad_txq_wi_prepare(struct bnad *bnad, struct bna_tcb *tcb, BNAD_UPDATE_CTR(bnad, tx_skb_mss_too_long); return -EINVAL; } - if (unlikely((gso_size + skb_transport_offset(skb) + - tcp_hdrlen(skb)) >= skb->len)) { + if (unlikely((gso_size + skb_tcp_all_headers(skb)) >= skb->len)) { txqent->hdr.wi.opcode = htons(BNA_TXQ_WI_SEND); txqent->hdr.wi.lso_mss = 0; BNAD_UPDATE_CTR(bnad, tx_skb_tso_too_short); @@ -2872,8 +2871,7 @@ bnad_txq_wi_prepare(struct bnad *bnad, struct bna_tcb *tcb, BNAD_UPDATE_CTR(bnad, tcpcsum_offload); if (unlikely(skb_headlen(skb) < - skb_transport_offset(skb) + - tcp_hdrlen(skb))) { + skb_tcp_all_headers(skb))) { BNAD_UPDATE_CTR(bnad, tx_skb_tcp_hdr); return -EINVAL; } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index d0ea8dbfa213..90a9798424af 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2267,7 +2267,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) /* only queue eth + ip headers separately for UDP */ hdrlen = skb_transport_offset(skb); else - hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdrlen = skb_tcp_all_headers(skb); if (skb_headlen(skb) < hdrlen) { netdev_err(bp->dev, "Error - LSO headers fragmented!!!\n"); /* if this is required, would need to copy to single buffer */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 4367edbdd579..06397cc8bb36 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1261,7 +1261,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, static int nicvf_tso_count_subdescs(struct sk_buff *skb) { struct skb_shared_info *sh = skb_shinfo(skb); - unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + unsigned int sh_len = skb_tcp_all_headers(skb); unsigned int data_len = skb->len - sh_len; unsigned int p_len = sh->gso_size; long f_id = -1; /* id of the current fragment */ @@ -1382,7 +1382,7 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, if (nic->hw_tso && skb_shinfo(skb)->gso_size) { hdr->tso = 1; - hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr->tso_start = skb_tcp_all_headers(skb); hdr->tso_max_paysize = skb_shinfo(skb)->gso_size; /* For non-tunneled pkts, point this to L2 ethertype */ hdr->inner_l3_offset = skb_network_offset(skb) - 2; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index f889f404305c..ee52e3b1d74f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1531,7 +1531,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) #if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE) if (cxgb4_is_ktls_skb(skb) && - (skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)))) + (skb->len - skb_tcp_all_headers(skb))) return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev); #endif /* CHELSIO_TLS_DEVICE */ diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 60b648b46f75..bfee0e4e54b1 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1012,7 +1012,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, /* packet length = eth hdr len + ip hdr len + tcp hdr len * (including options). */ - pktlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + pktlen = skb_tcp_all_headers(skb); ctrl = sizeof(*cpl) + pktlen; len16 = DIV_ROUND_UP(sizeof(*wr) + ctrl, 16); @@ -1907,7 +1907,7 @@ static int chcr_ktls_sw_fallback(struct sk_buff *skb, return 0; th = tcp_hdr(nskb); - skb_offset = skb_transport_offset(nskb) + tcp_hdrlen(nskb); + skb_offset = skb_tcp_all_headers(nskb); data_len = nskb->len - skb_offset; skb_tx_timestamp(nskb); @@ -1938,7 +1938,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) unsigned long flags; tcp_seq = ntohl(th->seq); - skb_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + skb_offset = skb_tcp_all_headers(skb); skb_data_len = skb->len - skb_offset; data_len = skb_data_len; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 1c81b161de52..372fb7b3a282 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -680,11 +680,10 @@ static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, skb_frag_t *frag; if (skb->encapsulation) { - hdr_len = skb_inner_transport_header(skb) - skb->data; - hdr_len += inner_tcp_hdrlen(skb); + hdr_len = skb_inner_tcp_all_headers(skb); enic_preload_tcp_csum_encap(skb); } else { - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); enic_preload_tcp_csum(skb); } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 41acd18a3fd2..414362febbb9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -737,9 +737,9 @@ void be_link_status_update(struct be_adapter *adapter, u8 link_status) static int be_gso_hdr_len(struct sk_buff *skb) { if (skb->encapsulation) - return skb_inner_transport_offset(skb) + - inner_tcp_hdrlen(skb); - return skb_transport_offset(skb) + tcp_hdrlen(skb); + return skb_inner_tcp_all_headers(skb); + + return skb_tcp_all_headers(skb); } static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a90275143d87..e8e2aa1e7f01 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -691,7 +691,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq, struct bufdesc *bdp, int index) { struct fec_enet_private *fep = netdev_priv(ndev); - int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int hdr_len = skb_tcp_all_headers(skb); struct bufdesc_ex *ebdp = container_of(bdp, struct bufdesc_ex, desc); void *bufaddr; unsigned long dmabuf; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c index 0a4a590218ba..a97e3af00cb9 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -83,7 +83,7 @@ static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q, const struct fun_ktls_tx_ctx *tls_ctx; u32 datalen, seq; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + datalen = skb->len - skb_tcp_all_headers(skb); if (!datalen) return skb; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index f7ba616195f3..588d64819ed5 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -386,7 +386,7 @@ static int gve_prep_tso(struct sk_buff *skb) (__force __wsum)htonl(paylen)); /* Compute length of segmentation header. */ - header_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + header_len = skb_tcp_all_headers(skb); break; default: return -EINVAL; @@ -598,9 +598,9 @@ static int gve_num_buffer_descs_needed(const struct sk_buff *skb) */ static bool gve_can_send_tso(const struct sk_buff *skb) { - const int header_len = skb_checksum_start_offset(skb) + tcp_hdrlen(skb); const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - 1; const struct skb_shared_info *shinfo = skb_shinfo(skb); + const int header_len = skb_tcp_all_headers(skb); const int gso_size = shinfo->gso_size; int cur_seg_num_bufs; int cur_seg_size; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 2f0bd21a9082..d94cc8c6681f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -31,8 +31,6 @@ #define HNS_BUFFER_SIZE_2048 2048 #define BD_MAX_SEND_SIZE 8191 -#define SKB_TMP_LEN(SKB) \ - (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, int send_sz, dma_addr_t dma, int frag_end, @@ -94,7 +92,7 @@ static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, HNSV2_TXD_TSE_B, 1); l4_len = tcp_hdrlen(skb); mss = skb_shinfo(skb)->gso_size; - paylen = skb->len - SKB_TMP_LEN(skb); + paylen = skb->len - skb_tcp_all_headers(skb); } } else if (skb->protocol == htons(ETH_P_IPV6)) { hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1); @@ -108,7 +106,7 @@ static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, HNSV2_TXD_TSE_B, 1); l4_len = tcp_hdrlen(skb); mss = skb_shinfo(skb)->gso_size; - paylen = skb->len - SKB_TMP_LEN(skb); + paylen = skb->len - skb_tcp_all_headers(skb); } } desc->tx.ip_offset = ip_offset; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index ae56306400b8..35d70041b9e8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1838,9 +1838,9 @@ static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size, static unsigned int hns3_gso_hdr_len(struct sk_buff *skb) { if (!skb->encapsulation) - return skb_transport_offset(skb) + tcp_hdrlen(skb); + return skb_tcp_all_headers(skb); - return skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + return skb_inner_tcp_all_headers(skb); } /* HW need every continuous max_non_tso_bd_num buffer data to be larger diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h index 5153e5d41bbd..b8a1ecb4b8fb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h @@ -37,8 +37,7 @@ DECLARE_EVENT_CLASS(hns3_skb_template, __entry->gso_segs = skb_shinfo(skb)->gso_segs; __entry->gso_type = skb_shinfo(skb)->gso_type; __entry->hdr_len = skb->encapsulation ? - skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb) : - skb_transport_offset(skb) + tcp_hdrlen(skb); + skb_inner_tcp_all_headers(skb) : skb_tcp_all_headers(skb); __entry->ip_summed = skb->ip_summed; __entry->fraglist = skb_has_frag_list(skb); hns3_shinfo_pack(skb_shinfo(skb), __entry->size); diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 8ce3348edf08..5dc302880f5f 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -1617,7 +1617,7 @@ static void write_swqe2_immediate(struct sk_buff *skb, struct ehea_swqe *swqe, * For TSO packets we only copy the headers into the * immediate area. */ - immediate_len = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb); + immediate_len = skb_tcp_all_headers(skb); } if (skb_is_gso(skb) || skb_data_size >= SWQE2_MAX_IMM) { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3f5feb55cfba..23299fc56199 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2708,7 +2708,7 @@ static int e1000_tso(struct e1000_adapter *adapter, if (err < 0) return err; - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); mss = skb_shinfo(skb)->gso_size; if (protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); @@ -3139,7 +3139,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, max_per_txd = min(mss << 2, max_per_txd); max_txd_pwr = fls(max_per_txd) - 1; - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); if (skb->data_len && hdr_len == len) { switch (hw->mac_type) { case e1000_82544: { diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index fa06f68c8c80..38e60def8520 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5474,7 +5474,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); mss = skb_shinfo(skb)->gso_size; if (protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); @@ -5846,7 +5846,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, * points to just header, pull a few bytes of payload from * frags into skb->data */ - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); /* we do this workaround for ES2LAN, but it is un-necessary, * avoiding it could save a lot of cycles */ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index bca53625da33..45be9a1ab6af 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -1187,7 +1187,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb) if (err < 0) return err; - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); mss = skb_shinfo(skb)->gso_size; iph = ip_hdr(skb); iph->tot_len = 0; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 57eff4e9e6de..b6be0552a6c1 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -775,7 +775,7 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length, u32 *first_cmd_sts, bool first_desc) { struct mv643xx_eth_private *mp = txq_to_mp(txq); - int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int hdr_len = skb_tcp_all_headers(skb); int tx_index; struct tx_desc *desc; int ret; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 384f5a16753d..0caa2df87c04 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2664,8 +2664,8 @@ err_drop_frame: static inline void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq) { - int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + int hdr_len = skb_tcp_all_headers(skb); struct mvneta_tx_desc *tx_desc; tx_desc = mvneta_txq_next_desc_get(txq); @@ -2727,7 +2727,7 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, if ((txq->count + tso_count_descs(skb)) >= txq->size) return 0; - if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) { + if (skb_headlen(skb) < skb_tcp_all_headers(skb)) { pr_info("*** Is this even possible?\n"); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 3baeafc40807..a18e8efd0f1e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -624,7 +624,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, ext->subdc = NIX_SUBDC_EXT; if (skb_shinfo(skb)->gso_size) { ext->lso = 1; - ext->lso_sb = skb_transport_offset(skb) + tcp_hdrlen(skb); + ext->lso_sb = skb_tcp_all_headers(skb); ext->lso_mps = skb_shinfo(skb)->gso_size; /* Only TSOv4 and TSOv6 GSO offloads are supported */ @@ -931,7 +931,7 @@ static bool is_hw_tso_supported(struct otx2_nic *pfvf, * be correctly modified, hence don't offload such TSO segments. */ - payload_len = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + payload_len = skb->len - skb_tcp_all_headers(skb); last_seg_size = payload_len % skb_shinfo(skb)->gso_size; if (last_seg_size && last_seg_size < 16) return false; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 9f1a7ec0491a..bbea5458000b 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -1863,7 +1863,7 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb, if (mss != 0) { if (!(hw->flags & SKY2_HW_NEW_LE)) - mss += ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb); + mss += skb_tcp_all_headers(skb); if (mss != sky2->tx_last_mss) { le = get_tx_le(sky2, &slot); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index af3b2b59a2a6..43a4102e9c09 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -645,7 +645,7 @@ static int get_real_size(const struct sk_buff *skb, *inline_ok = false; *hopbyhop = 0; if (skb->encapsulation) { - *lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb); + *lso_header_size = skb_inner_tcp_all_headers(skb); } else { /* Detects large IPV6 TCP packets and prepares for removal of * HBH header that has been pushed by ip6_xmit(), @@ -653,7 +653,7 @@ static int get_real_size(const struct sk_buff *skb, */ if (ipv6_has_hopopt_jumbo(skb)) *hopbyhop = sizeof(struct hop_jumbo_hdr); - *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); + *lso_header_size = skb_tcp_all_headers(skb); } real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE + ALIGN(*lso_header_size - *hopbyhop + 4, DS_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 4b6f0d1ea59a..cc5cb3010e64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -458,7 +458,7 @@ bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, int datalen; u32 seq; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + datalen = skb->len - skb_tcp_all_headers(skb); if (!datalen) return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 50d14cec4894..64d78fd99c6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -152,14 +152,14 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) *hopbyhop = 0; if (skb->encapsulation) { - ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + ihs = skb_tcp_all_headers(skb); stats->tso_inner_packets++; stats->tso_inner_bytes += skb->len - ihs; } else { if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { ihs = skb_transport_offset(skb) + sizeof(struct udphdr); } else { - ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + ihs = skb_tcp_all_headers(skb); if (ipv6_has_hopopt_jumbo(skb)) { *hopbyhop = sizeof(struct hop_jumbo_hdr); ihs -= sizeof(struct hop_jumbo_hdr); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 61497c3e4cfb..971dde8c3286 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2692,7 +2692,7 @@ again: * send loop that we are still in the * header portion of the TSO packet. * TSO header can be at most 1KB long */ - cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); + cum_len = -skb_tcp_all_headers(skb); /* for IPv6 TSO, the checksum offset stores the * TCP header length, to save the firmware from diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index f9410d59146d..8a9d36619659 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -81,12 +81,11 @@ nfp_nfd3_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfd3_tx_buf *txbuf, if (!skb->encapsulation) { l3_offset = skb_network_offset(skb); l4_offset = skb_transport_offset(skb); - hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdrlen = skb_tcp_all_headers(skb); } else { l3_offset = skb_inner_network_offset(skb); l4_offset = skb_inner_transport_offset(skb); - hdrlen = skb_inner_transport_header(skb) - skb->data + - inner_tcp_hdrlen(skb); + hdrlen = skb_inner_tcp_all_headers(skb); } txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index 300637e576a8..85dd376a6adc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -46,12 +46,11 @@ nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, if (!skb->encapsulation) { l3_offset = skb_network_offset(skb); l4_offset = skb_transport_offset(skb); - hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdrlen = skb_tcp_all_headers(skb); } else { l3_offset = skb_inner_network_offset(skb); l4_offset = skb_inner_transport_offset(skb); - hdrlen = skb_inner_transport_header(skb) - skb->data + - inner_tcp_hdrlen(skb); + hdrlen = skb_inner_tcp_all_headers(skb); } segs = skb_shinfo(skb)->gso_segs; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index bcdd5ab0da5a..3e2ebe42e679 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -598,7 +598,7 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) return skb; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + datalen = skb->len - skb_tcp_all_headers(skb); seq = ntohl(tcp_hdr(skb)->seq); ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); resync_pending = tls_offload_tx_resync_pending(skb->sk); @@ -666,7 +666,7 @@ void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))) return; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + datalen = skb->len - skb_tcp_all_headers(skb); seq = ntohl(tcp_hdr(skb)->seq); ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); @@ -1758,8 +1758,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, if (skb_is_gso(skb)) { u32 hdrlen; - hdrlen = skb_inner_transport_header(skb) - skb->data + - inner_tcp_hdrlen(skb); + hdrlen = skb_inner_tcp_all_headers(skb); /* Assume worst case scenario of having longest possible * metadata prepend - 8B diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index f54035455ad6..c03986bf2628 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -947,10 +947,9 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) } if (encap) - hdrlen = skb_inner_transport_header(skb) - skb->data + - inner_tcp_hdrlen(skb); + hdrlen = skb_inner_tcp_all_headers(skb); else - hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdrlen = skb_tcp_all_headers(skb); tso_rem = len; seg_rem = min(tso_rem, hdrlen + mss); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 07dd3c3b1771..4e6f00af17d9 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1877,7 +1877,7 @@ netxen_tso_check(struct net_device *netdev, if ((netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && skb_shinfo(skb)->gso_size > 0) { - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); first_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); first_desc->total_hdr_length = hdr_len; diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index b7cc36589f59..7c2af482192d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -260,11 +260,9 @@ static int map_frag_to_bd(struct qede_tx_queue *txq, static u16 qede_get_skb_hlen(struct sk_buff *skb, bool is_encap_pkt) { if (is_encap_pkt) - return (skb_inner_transport_header(skb) + - inner_tcp_hdrlen(skb) - skb->data); - else - return (skb_transport_header(skb) + - tcp_hdrlen(skb) - skb->data); + return skb_inner_tcp_all_headers(skb); + + return skb_tcp_all_headers(skb); } /* +2 for 1st BD for headers and 2nd BD for headlen (if required) */ diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 8d43ca282956..9da5e97f8a0a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -497,7 +497,7 @@ set_flags: } opcode = QLCNIC_TX_ETHER_PKT; if (skb_is_gso(skb)) { - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); first_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); first_desc->hdr_length = hdr_len; opcode = (protocol == ETH_P_IPV6) ? QLCNIC_TX_TCP_LSO6 : diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 80c95c331c82..0d80447d4d3b 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1264,7 +1264,7 @@ static int emac_tso_csum(struct emac_adapter *adpt, pskb_trim(skb, pkt_len); } - hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + hdr_len = skb_tcp_all_headers(skb); if (unlikely(skb->len == hdr_len)) { /* we only need to do csum */ netif_warn(adpt, tx_err, adpt->netdev, @@ -1339,7 +1339,7 @@ static void emac_tx_fill_tpd(struct emac_adapter *adpt, /* if Large Segment Offload is (in TCP Segmentation Offload struct) */ if (TPD_LSO(tpd)) { - mapped_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + mapped_len = skb_tcp_all_headers(skb); tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); tpbuf->length = mapped_len; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index fe263cad8248..6f14b00c0b14 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3961,7 +3961,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) proto_hdr_len = skb_transport_offset(skb) + sizeof(struct udphdr); hdr = sizeof(struct udphdr); } else { - proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + proto_hdr_len = skb_tcp_all_headers(skb); hdr = tcp_hdrlen(skb); } diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index d435519236e4..e54ce73396ee 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -81,7 +81,7 @@ static int xlgmac_prep_tso(struct sk_buff *skb, if (ret) return ret; - pkt_info->header_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + pkt_info->header_len = skb_tcp_all_headers(skb); pkt_info->tcp_header_len = tcp_hdrlen(skb); pkt_info->tcp_payload_len = skb->len - pkt_info->header_len; pkt_info->mss = skb_shinfo(skb)->gso_size; diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index 5704defd7be1..237cbd5c5060 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -1782,9 +1782,7 @@ static int __wil_tx_vring_tso(struct wil6210_priv *wil, struct wil6210_vif *vif, } /* Header Length = MAC header len + IP header len + TCP header len*/ - hdrlen = ETH_HLEN + - (int)skb_network_header_len(skb) + - tcp_hdrlen(skb); + hdrlen = skb_tcp_all_headers(skb); gso_type = skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV6 | SKB_GSO_TCPV4); switch (gso_type) { diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index fc61a4418737..a256695fc89e 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1201,9 +1201,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) } mss = skb_shinfo(skb)->gso_size; - hdrlen = skb_transport_header(skb) - - skb_mac_header(skb) + - tcp_hdrlen(skb); + hdrlen = skb_tcp_all_headers(skb); skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len - hdrlen, mss); diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 113a3efd12e9..6cd7fc9589c3 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -2461,7 +2461,7 @@ static int qlge_tso(struct sk_buff *skb, struct qlge_ob_mac_tso_iocb_req *mac_io mac_iocb_ptr->flags3 |= OB_MAC_TSO_IOCB_IC; mac_iocb_ptr->frame_len = cpu_to_le32((u32)skb->len); mac_iocb_ptr->total_hdrs_len = - cpu_to_le16(skb_transport_offset(skb) + tcp_hdrlen(skb)); + cpu_to_le16(skb_tcp_all_headers(skb)); mac_iocb_ptr->net_trans_offset = cpu_to_le16(skb_network_offset(skb) | skb_transport_offset(skb) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1168302b7927..a9fbe22732c3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -46,6 +46,36 @@ static inline unsigned int inner_tcp_hdrlen(const struct sk_buff *skb) return inner_tcp_hdr(skb)->doff * 4; } +/** + * skb_tcp_all_headers - Returns size of all headers for a TCP packet + * @skb: buffer + * + * Used in TX path, for a packet known to be a TCP one. + * + * if (skb_is_gso(skb)) { + * int hlen = skb_tcp_all_headers(skb); + * ... + */ +static inline int skb_tcp_all_headers(const struct sk_buff *skb) +{ + return skb_transport_offset(skb) + tcp_hdrlen(skb); +} + +/** + * skb_inner_tcp_all_headers - Returns size of all headers for an encap TCP packet + * @skb: buffer + * + * Used in TX path, for a packet known to be a TCP one. + * + * if (skb_is_gso(skb) && skb->encapsulation) { + * int hlen = skb_inner_tcp_all_headers(skb); + * ... + */ +static inline int skb_inner_tcp_all_headers(const struct sk_buff *skb) +{ + return skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); +} + static inline unsigned int tcp_optlen(const struct sk_buff *skb) { return (tcp_hdr(skb)->doff - 5) * 4; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index e40bedd112b6..3bae29ae57ca 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -232,7 +232,7 @@ static int fill_sg_in(struct scatterlist *sg_in, s32 *sync_size, int *resync_sgs) { - int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + int tcp_payload_offset = skb_tcp_all_headers(skb); int payload_len = skb->len - tcp_payload_offset; u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); struct tls_record_info *record; @@ -310,8 +310,8 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, struct sk_buff *skb, s32 sync_size, u64 rcd_sn) { - int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); + int tcp_payload_offset = skb_tcp_all_headers(skb); int payload_len = skb->len - tcp_payload_offset; void *buf, *iv, *aad, *dummy_buf; struct aead_request *aead_req; @@ -372,7 +372,7 @@ free_nskb: static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb) { - int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb); + int tcp_payload_offset = skb_tcp_all_headers(skb); struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); int payload_len = skb->len - tcp_payload_offset; -- cgit v1.2.3 From f019679ea5f2ab650c3348a79e7d9c3625f62899 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 30 May 2022 06:07:57 +0300 Subject: net/mlx5: E-switch, Remove dependency between sriov and eswitch mode Currently, there are three eswitch modes, none, legacy and switchdev. None is the default mode. Remove redundant none mode as eswitch mode should always be either legacy mode or switchdev mode. With this patch, there are two behavior changes: 1. Legacy becomes the default mode. When querying eswitch mode using devlink, a valid mode is always returned. 2. When disabling sriov, the eswitch mode will not change, only vfs are unloaded. Signed-off-by: Chris Mi Reviewed-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 87 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 9 ++- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 52 ++++--------- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 10 +-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + .../net/ethernet/mellanox/mlx5/core/sf/devlink.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 3 +- include/linux/mlx5/eswitch.h | 8 +- 8 files changed, 89 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b9a3473f5672..82b62ab1d1d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1152,8 +1152,6 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) { const u32 *out; - WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); - if (num_vfs < 0) return; @@ -1287,7 +1285,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) return 0; abort: - esw->mode = MLX5_ESWITCH_NONE; + esw->mode = MLX5_ESWITCH_LEGACY; if (mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); @@ -1312,13 +1310,13 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (!mlx5_esw_allowed(esw)) return 0; - toggle_lag = esw->mode == MLX5_ESWITCH_NONE; + toggle_lag = !mlx5_esw_is_fdb_created(esw); if (toggle_lag) mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); - if (esw->mode == MLX5_ESWITCH_NONE) { + if (!mlx5_esw_is_fdb_created(esw)) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); } else { enum mlx5_eswitch_vport_event vport_events; @@ -1337,56 +1335,79 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) return ret; } -void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) +/* When disabling sriov, free driver level resources. */ +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) { - struct devlink *devlink = priv_to_devlink(esw->dev); - int old_mode; - - lockdep_assert_held_write(&esw->mode_lock); - - if (esw->mode == MLX5_ESWITCH_NONE) + if (!mlx5_esw_allowed(esw)) return; - esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + down_write(&esw->mode_lock); + /* If driver is unloaded, this function is called twice by remove_one() + * and mlx5_unload(). Prevent the second call. + */ + if (!esw->esw_funcs.num_vfs && !clear_vf) + goto unlock; + + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + /* If disabling sriov in switchdev mode, free meta rules here + * because it depends on num_vfs. + */ + if (esw->mode == MLX5_ESWITCH_OFFLOADS) { + struct devlink *devlink = priv_to_devlink(esw->dev); + + esw_offloads_del_send_to_vport_meta_rules(esw); + devlink_rate_nodes_destroy(devlink); + } + + esw->esw_funcs.num_vfs = 0; + +unlock: + up_write(&esw->mode_lock); +} + +/* Free resources for corresponding eswitch mode. It is called by devlink + * when changing eswitch mode or modprobe when unloading driver. + */ +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + /* Notify eswitch users that it is exiting from current mode. * So that it can do necessary cleanup before the eswitch is disabled. */ - mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_NONE); + mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); mlx5_eswitch_event_handlers_unregister(esw); + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; - if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_legacy_disable(esw); - else if (esw->mode == MLX5_ESWITCH_OFFLOADS) + if (esw->mode == MLX5_ESWITCH_OFFLOADS) esw_offloads_disable(esw); - - old_mode = esw->mode; - esw->mode = MLX5_ESWITCH_NONE; - - if (old_mode == MLX5_ESWITCH_OFFLOADS) - mlx5_rescan_drivers(esw->dev); - - devlink_rate_nodes_destroy(devlink); - + else if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_legacy_disable(esw); mlx5_esw_acls_ns_cleanup(esw); - if (clear_vf) - mlx5_eswitch_clear_vf_vports_info(esw); + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + devlink_rate_nodes_destroy(devlink); } -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { if (!mlx5_esw_allowed(esw)) return; mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); - mlx5_eswitch_disable_locked(esw, clear_vf); - esw->esw_funcs.num_vfs = 0; + mlx5_eswitch_disable_locked(esw); up_write(&esw->mode_lock); mlx5_lag_enable_change(esw->dev); } @@ -1581,7 +1602,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; - esw->mode = MLX5_ESWITCH_NONE; + esw->mode = MLX5_ESWITCH_LEGACY; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) @@ -1883,7 +1904,7 @@ u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; - return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_NONE; + return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a9ba0e324834..5452437e531f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -342,6 +342,7 @@ void esw_offloads_disable(struct mlx5_eswitch *esw); int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); +void esw_offloads_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw); bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); @@ -357,8 +358,9 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); #define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs); int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); -void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf); -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, const u8 *mac); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, @@ -729,7 +731,8 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } -static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2ce3728576d1..7c51011aed2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1040,6 +1040,15 @@ static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) mlx5_del_flow_rules(flows[i]); kvfree(flows); + /* If changing eswitch mode from switchdev to legacy, but num_vfs is not 0, + * meta rules could be freed again. So set it to NULL. + */ + esw->fdb_table.offloads.send_to_vport_meta_rules = NULL; +} + +void esw_offloads_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) +{ + mlx5_eswitch_del_send_to_vport_meta_rules(esw); } static int @@ -2034,7 +2043,7 @@ static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; - if (esw->mode == MLX5_ESWITCH_NONE) + if (!mlx5_esw_is_fdb_created(esw)) return -EOPNOTSUPP; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { @@ -2170,7 +2179,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable_locked(esw, false); err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, esw->dev->priv.sriov.num_vfs); if (err) { @@ -2894,7 +2902,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) int err = 0; down_write(&esw->mode_lock); - if (esw->mode != MLX5_ESWITCH_NONE) { + if (mlx5_esw_is_fdb_created(esw)) { err = -EBUSY; goto done; } @@ -3229,7 +3237,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable_locked(esw, false); err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) { @@ -3334,15 +3341,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw) -{ - /* devlink commands in NONE eswitch mode are currently supported only - * on ECPF. - */ - return (esw->mode == MLX5_ESWITCH_NONE && - !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0; -} - /* FIXME: devl_unlock() followed by devl_lock() inside driver callback * is never correct and prone to races. It's a transitional workaround, * never repeat this pattern. @@ -3399,6 +3397,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (cur_mlx5_mode == mlx5_mode) goto unlock; + mlx5_eswitch_disable_locked(esw); if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -3409,6 +3408,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = esw_offloads_start(esw, extack); } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { err = esw_offloads_stop(esw, extack); + mlx5_rescan_drivers(esw->dev); } else { err = -EINVAL; } @@ -3431,12 +3431,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) return PTR_ERR(esw); mlx5_eswtich_mode_callback_enter(devlink, esw); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; - err = esw_mode_to_devlink(esw->mode, mode); -unlock: mlx5_eswtich_mode_callback_exit(devlink, esw); return err; } @@ -3485,9 +3480,6 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, return PTR_ERR(esw); mlx5_eswtich_mode_callback_enter(devlink, esw); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto out; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: @@ -3539,12 +3531,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return PTR_ERR(esw); mlx5_eswtich_mode_callback_enter(devlink, esw); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; - err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); -unlock: mlx5_eswtich_mode_callback_exit(devlink, esw); return err; } @@ -3555,16 +3542,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw; - int err; + int err = 0; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); mlx5_eswtich_mode_callback_enter(devlink, esw); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || @@ -3615,21 +3599,15 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); mlx5_eswtich_mode_callback_enter(devlink, esw); - err = eswitch_devlink_esw_mode_check(esw); - if (err) - goto unlock; - *encap = esw->offloads.encap; -unlock: mlx5_eswtich_mode_callback_exit(devlink, esw); - return err; + return 0; } static bool diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 2a8fc547eb37..641505d2c0c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -632,6 +632,7 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { #ifdef CONFIG_MLX5_ESWITCH + struct mlx5_core_dev *dev; u8 mode; #endif int i; @@ -641,11 +642,11 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; #ifdef CONFIG_MLX5_ESWITCH - mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev); - - if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS) + dev = ldev->pf[MLX5_LAG_P1].dev; + if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) return false; + mode = mlx5_eswitch_mode(dev); for (i = 0; i < ldev->ports; i++) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) return false; @@ -760,8 +761,7 @@ static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) #ifdef CONFIG_MLX5_ESWITCH for (i = 0; i < ldev->ports; i++) - roce_lag = roce_lag && - ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE; + roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); #endif return roce_lag; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2078d9f03a5f..a9e51c1b7738 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1250,6 +1250,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) { mlx5_sf_dev_table_destroy(dev); mlx5_sriov_detach(dev); + mlx5_eswitch_disable(dev->priv.eswitch); mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); mlx5_sf_hw_table_destroy(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 3be659cd91f1..7d955a4d9f14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -501,7 +501,7 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi case MLX5_ESWITCH_OFFLOADS: mlx5_sf_table_enable(table); break; - case MLX5_ESWITCH_NONE: + case MLX5_ESWITCH_LEGACY: mlx5_sf_table_disable(table); break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 2935614f6fa9..5757cd6e1819 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -145,8 +145,7 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) sriov->vfs_ctx[vf].enabled = 0; } - if (MLX5_ESWITCH_MANAGER(dev)) - mlx5_eswitch_disable(dev->priv.eswitch, clear_vf); + mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 8b18fe9771f9..e2701ed0200e 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -12,7 +12,6 @@ #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) enum { - MLX5_ESWITCH_NONE, MLX5_ESWITCH_LEGACY, MLX5_ESWITCH_OFFLOADS }; @@ -153,7 +152,7 @@ struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { - return MLX5_ESWITCH_NONE; + return MLX5_ESWITCH_LEGACY; } static inline enum devlink_eswitch_encap_mode @@ -198,6 +197,11 @@ static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitc #endif /* CONFIG_MLX5_ESWITCH */ +static inline bool is_mdev_legacy_mode(struct mlx5_core_dev *dev) +{ + return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_LEGACY; +} + static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) { return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; -- cgit v1.2.3 From 036bff2800cbcf8217dd0bc93d8421b5b8f72476 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Tue, 28 Jun 2022 18:31:28 +0200 Subject: can: netlink: dump bitrate 0 if can_priv::bittiming.bitrate is -1U Upcoming changes on slcan driver will require you to specify a bitrate of value -1 to prevent the open_candev() from failing but at the same time highlighting that it is a fake value. In this case the command `ip --details -s -s link show' would print 4294967295 as the bitrate value. The patch change this value in 0. Link: https://lore.kernel.org/all/20220628163137.413025-5-dario.binacchi@amarulasolutions.com Suggested-by: Marc Kleine-Budde Signed-off-by: Dario Binacchi Tested-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/netlink.c | 3 ++- include/linux/can/bittiming.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 037824011266..8efa22d9f214 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -511,7 +511,8 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) if (priv->do_get_state) priv->do_get_state(dev, &state); - if ((priv->bittiming.bitrate && + if ((priv->bittiming.bitrate != CAN_BITRATE_UNSET && + priv->bittiming.bitrate != CAN_BITRATE_UNKNOWN && nla_put(skb, IFLA_CAN_BITTIMING, sizeof(priv->bittiming), &priv->bittiming)) || diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 7ae21c0f7f23..ef0a77173e3c 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -11,6 +11,8 @@ #define CAN_SYNC_SEG 1 +#define CAN_BITRATE_UNSET 0 +#define CAN_BITRATE_UNKNOWN (-1U) #define CAN_CTRLMODE_TDC_MASK \ (CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL) -- cgit v1.2.3 From 39bfb3c12d792181de0cf3306be1ea03664a1b05 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Fri, 1 Jul 2022 19:56:06 +0200 Subject: net: phy: broadcom: Add support for BCM53128 internal PHYs Add support for BCM53128 internal PHYs. These support interrupts as well as statistics. Therefore, enable the Broadcom PHY driver for them. Tested on BCM53128 switch using the mainline b53 DSA driver. Signed-off-by: Kurt Kanzenbach Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 15 +++++++++++++++ include/linux/brcmphy.h | 1 + 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 876bc45ede60..31fbcdddc9ad 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -1066,6 +1066,20 @@ static struct phy_driver broadcom_drivers[] = { .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, .link_change_notify = bcm54xx_link_change_notify, +}, { + .phy_id = PHY_ID_BCM53128, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM53128", + .flags = PHY_IS_INTERNAL, + /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, + .config_init = bcm54xx_config_init, + .config_intr = bcm_phy_config_intr, + .handle_interrupt = bcm_phy_handle_interrupt, + .link_change_notify = bcm54xx_link_change_notify, }, { .phy_id = PHY_ID_BCM89610, .phy_id_mask = 0xfffffff0, @@ -1102,6 +1116,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5241, 0xfffffff0 }, { PHY_ID_BCM5395, 0xfffffff0 }, { PHY_ID_BCM53125, 0xfffffff0 }, + { PHY_ID_BCM53128, 0xfffffff0 }, { PHY_ID_BCM89610, 0xfffffff0 }, { } }; diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 747fad264033..6ff567ece34a 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -16,6 +16,7 @@ #define PHY_ID_BCM5481 0x0143bca0 #define PHY_ID_BCM5395 0x0143bcf0 #define PHY_ID_BCM53125 0x03625f20 +#define PHY_ID_BCM53128 0x03625e10 #define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM54811 0x03625cc0 #define PHY_ID_BCM5482 0x0143bcb0 -- cgit v1.2.3 From 88527790c079fb1ea41cbcfa4450ee37906a2fb0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Jul 2022 16:59:24 -0700 Subject: tls: rx: add sockopt for enabling optimistic decrypt with TLS 1.3 Since optimisitic decrypt may add extra load in case of retries require socket owner to explicitly opt-in. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/tls.rst | 18 ++++++++++ include/linux/sockptr.h | 8 +++++ include/net/tls.h | 3 ++ include/uapi/linux/snmp.h | 1 + include/uapi/linux/tls.h | 2 ++ net/tls/tls_main.c | 75 ++++++++++++++++++++++++++++++++++++++++ net/tls/tls_proc.c | 1 + net/tls/tls_sw.c | 21 +++++++---- 8 files changed, 122 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index be8e10c14b05..7a6643836e42 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -239,6 +239,19 @@ for the original TCP transmission and TCP retransmissions. To the receiver this will look like TLS records had been tampered with and will result in record authentication failures. +TLS_RX_EXPECT_NO_PAD +~~~~~~~~~~~~~~~~~~~~ + +TLS 1.3 only. Expect the sender to not pad records. This allows the data +to be decrypted directly into user space buffers with TLS 1.3. + +This optimization is safe to enable only if the remote end is trusted, +otherwise it is an attack vector to doubling the TLS processing cost. + +If the record decrypted turns out to had been padded or is not a data +record it will be decrypted again into a kernel buffer without zero copy. +Such events are counted in the ``TlsDecryptRetry`` statistic. + Statistics ========== @@ -264,3 +277,8 @@ TLS implementation exposes the following per-namespace statistics - ``TlsDeviceRxResync`` - number of RX resyncs sent to NICs handling cryptography + +- ``TlsDecryptRetry`` - + number of RX records which had to be re-decrypted due to + ``TLS_RX_EXPECT_NO_PAD`` mis-prediction. Note that this counter will + also increment for non-data records. diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index ea193414298b..d45902fb4cad 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -102,4 +102,12 @@ static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) return strncpy_from_user(dst, src.user, count); } +static inline int check_zeroed_sockptr(sockptr_t src, size_t offset, + size_t size) +{ + if (!sockptr_is_kernel(src)) + return check_zeroed_user(src.user + offset, size); + return memchr_inv(src.kernel + offset, 0, size) == NULL; +} + #endif /* _LINUX_SOCKPTR_H */ diff --git a/include/net/tls.h b/include/net/tls.h index 8017f1703447..4fc16ca5f469 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -149,6 +149,7 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 async_capable:1; + u8 zc_capable:1; atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; @@ -239,6 +240,7 @@ struct tls_context { u8 tx_conf:3; u8 rx_conf:3; u8 zerocopy_sendfile:1; + u8 rx_no_pad:1; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); @@ -358,6 +360,7 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval, void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 904909d020e2..1c9152add663 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -344,6 +344,7 @@ enum LINUX_MIB_TLSRXDEVICE, /* TlsRxDevice */ LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */ LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ + LINUX_MIN_TLSDECRYPTRETRY, /* TlsDecryptRetry */ __LINUX_MIB_TLSMAX }; diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index bb8f80812b0b..f1157d8f4acd 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -40,6 +40,7 @@ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ #define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ +#define TLS_RX_EXPECT_NO_PAD 4 /* Attempt opportunistic zero-copy */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -162,6 +163,7 @@ enum { TLS_INFO_TXCONF, TLS_INFO_RXCONF, TLS_INFO_ZC_RO_TX, + TLS_INFO_RX_NO_PAD, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 2ffede463e4a..1b3efc96db0b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -533,6 +533,37 @@ static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval, return 0; } +static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, + int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + unsigned int value; + int err, len; + + if (ctx->prot_info.version != TLS_1_3_VERSION) + return -EINVAL; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < sizeof(value)) + return -EINVAL; + + lock_sock(sk); + err = -EINVAL; + if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) + value = ctx->rx_no_pad; + release_sock(sk); + if (err) + return err; + + if (put_user(sizeof(value), optlen)) + return -EFAULT; + if (copy_to_user(optval, &value, sizeof(value))) + return -EFAULT; + + return 0; +} + static int do_tls_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { @@ -547,6 +578,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, case TLS_TX_ZEROCOPY_RO: rc = do_tls_getsockopt_tx_zc(sk, optval, optlen); break; + case TLS_RX_EXPECT_NO_PAD: + rc = do_tls_getsockopt_no_pad(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; @@ -718,6 +752,38 @@ static int do_tls_setsockopt_tx_zc(struct sock *sk, sockptr_t optval, return 0; } +static int do_tls_setsockopt_no_pad(struct sock *sk, sockptr_t optval, + unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + u32 val; + int rc; + + if (ctx->prot_info.version != TLS_1_3_VERSION || + sockptr_is_null(optval) || optlen < sizeof(val)) + return -EINVAL; + + rc = copy_from_sockptr(&val, optval, sizeof(val)); + if (rc) + return -EFAULT; + if (val > 1) + return -EINVAL; + rc = check_zeroed_sockptr(optval, sizeof(val), optlen - sizeof(val)); + if (rc < 1) + return rc == 0 ? -EINVAL : rc; + + lock_sock(sk); + rc = -EINVAL; + if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) { + ctx->rx_no_pad = val; + tls_update_rx_zc_capable(ctx); + rc = 0; + } + release_sock(sk); + + return rc; +} + static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { @@ -736,6 +802,9 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, rc = do_tls_setsockopt_tx_zc(sk, optval, optlen); release_sock(sk); break; + case TLS_RX_EXPECT_NO_PAD: + rc = do_tls_setsockopt_no_pad(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; @@ -976,6 +1045,11 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb) if (err) goto nla_failure; } + if (ctx->rx_no_pad) { + err = nla_put_flag(skb, TLS_INFO_RX_NO_PAD); + if (err) + goto nla_failure; + } rcu_read_unlock(); nla_nest_end(skb, start); @@ -997,6 +1071,7 @@ static size_t tls_get_info_size(const struct sock *sk) nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */ nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */ + nla_total_size(0) + /* TLS_INFO_RX_NO_PAD */ 0; return size; diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c index feeceb0e4cb4..0c200000cc45 100644 --- a/net/tls/tls_proc.c +++ b/net/tls/tls_proc.c @@ -18,6 +18,7 @@ static const struct snmp_mib tls_mib_list[] = { SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE), SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR), SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), + SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIN_TLSDECRYPTRETRY), SNMP_MIB_SENTINEL }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 2bac57684429..7592b6519953 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1601,6 +1601,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION && darg->tail != TLS_RECORD_TYPE_DATA)) { darg->zc = false; + TLS_INC_STATS(sock_net(sk), LINUX_MIN_TLSDECRYPTRETRY); return decrypt_skb_update(sk, skb, dest, darg); } @@ -1787,7 +1788,7 @@ int tls_sw_recvmsg(struct sock *sk, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && - prot->version != TLS_1_3_VERSION; + ctx->zc_capable; decrypted = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { struct tls_decrypt_arg darg = {}; @@ -2269,6 +2270,14 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) strp_check_rcv(&rx_ctx->strp); } +void tls_update_rx_zc_capable(struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); + + rx_ctx->zc_capable = tls_ctx->rx_no_pad || + tls_ctx->prot_info.version != TLS_1_3_VERSION; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2504,12 +2513,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (sw_ctx_rx) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); - if (crypto_info->version == TLS_1_3_VERSION) - sw_ctx_rx->async_capable = 0; - else - sw_ctx_rx->async_capable = - !!(tfm->__crt_alg->cra_flags & - CRYPTO_ALG_ASYNC); + tls_update_rx_zc_capable(ctx); + sw_ctx_rx->async_capable = + crypto_info->version != TLS_1_3_VERSION && + !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); /* Set up strparser */ memset(&cb, 0, sizeof(cb)); -- cgit v1.2.3 From 6976890e8998afd8abbbd9fe27ed71387b24f57f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 11:00:45 +0200 Subject: netfilter: nf_conntrack: add missing __rcu annotations Access to the hook pointers use correct helpers but the pointers lack the needed __rcu annotation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sip.h | 2 +- include/net/netfilter/nf_conntrack_timeout.h | 2 +- net/netfilter/nf_conntrack_pptp.c | 2 +- net/netfilter/nf_conntrack_sip.c | 2 +- net/netfilter/nf_conntrack_timeout.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index c620521c42bc..dbc614dfe0d5 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -164,7 +164,7 @@ struct nf_nat_sip_hooks { unsigned int medialen, union nf_inet_addr *rtp_addr); }; -extern const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +extern const struct nf_nat_sip_hooks __rcu *nf_nat_sip_hooks; int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr, unsigned int datalen, unsigned int *matchoff, diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index fea258983d23..9fdaba911de6 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -105,7 +105,7 @@ struct nf_ct_timeout_hooks { void (*timeout_put)(struct nf_ct_timeout *timeout); }; -extern const struct nf_ct_timeout_hooks *nf_ct_timeout_hook; +extern const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook; #endif #endif /* _NF_CONNTRACK_TIMEOUT_H */ diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index f3fa367b455f..4c679638df06 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -45,7 +45,7 @@ MODULE_ALIAS_NFCT_HELPER("pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); -const struct nf_nat_pptp_hook *nf_nat_pptp_hook; +const struct nf_nat_pptp_hook __rcu *nf_nat_pptp_hook; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook); #if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index b83dc9bf0a5d..a88b43624b27 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -60,7 +60,7 @@ module_param(sip_external_media, int, 0600); MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external " "endpoints (default 0)"); -const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +const struct nf_nat_sip_hooks __rcu *nf_nat_sip_hooks; EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); static int string_len(const struct nf_conn *ct, const char *dptr, diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 0f828d05ea60..821365ed5b2c 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -22,7 +22,7 @@ #include #include -const struct nf_ct_timeout_hooks *nf_ct_timeout_hook __read_mostly; +const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_hook); static int untimeout(struct nf_conn *ct, void *timeout) -- cgit v1.2.3 From d3f2d0a292c24fc624afb2b4f47f838e83775721 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2022 11:00:47 +0200 Subject: netfilter: h323: merge nat hook pointers into one sparse complains about incorrect rcu usage. Code uses the correct rcu access primitives, but the function pointers lack rcu annotations. Collapse all of them into a single structure, then annotate the pointer. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_h323.h | 109 ++++++------ net/ipv4/netfilter/nf_nat_h323.c | 42 ++--- net/netfilter/nf_conntrack_h323_main.c | 260 +++++++++++----------------- 3 files changed, 169 insertions(+), 242 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h index 4561ec0fcea4..9e937f64a1ad 100644 --- a/include/linux/netfilter/nf_conntrack_h323.h +++ b/include/linux/netfilter/nf_conntrack_h323.h @@ -38,60 +38,63 @@ void nf_conntrack_h245_expect(struct nf_conn *new, struct nf_conntrack_expect *this); void nf_conntrack_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this); -extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff, - unsigned char **data, int dataoff, - H245_TransportAddress *taddr, - union nf_inet_addr *addr, - __be16 port); -extern int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff, - unsigned char **data, int dataoff, - TransportAddress *taddr, - union nf_inet_addr *addr, - __be16 port); -extern int (*set_sig_addr_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - TransportAddress *taddr, int count); -extern int (*set_ras_addr_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - TransportAddress *taddr, int count); -extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - int dataoff, - H245_TransportAddress *taddr, - __be16 port, __be16 rtp_port, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp); -extern int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, + +struct nfct_h323_nat_hooks { + int (*set_h245_addr)(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, - H245_TransportAddress *taddr, __be16 port, - struct nf_conntrack_expect *exp); -extern int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, + H245_TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port); + int (*set_h225_addr)(struct sk_buff *skb, unsigned int protoff, unsigned char **data, int dataoff, - TransportAddress *taddr, __be16 port, - struct nf_conntrack_expect *exp); -extern int (*nat_callforwarding_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, int dataoff, - TransportAddress *taddr, - __be16 port, - struct nf_conntrack_expect *exp); -extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, TransportAddress *taddr, - int idx, __be16 port, - struct nf_conntrack_expect *exp); + TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port); + int (*set_sig_addr)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count); + int (*set_ras_addr)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count); + int (*nat_rtp_rtcp)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + __be16 port, __be16 rtp_port, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp); + int (*nat_t120)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp); + int (*nat_h245)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp); + int (*nat_callforwarding)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp); + int (*nat_q931)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, TransportAddress *taddr, int idx, + __be16 port, struct nf_conntrack_expect *exp); +}; +extern const struct nfct_h323_nat_hooks __rcu *nfct_h323_nat_hook; #endif diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 76a411ae9fe6..a334f0dcc2d0 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -579,28 +579,22 @@ static struct nf_ct_helper_expectfn callforwarding_nat = { .expectfn = ip_nat_callforwarding_expect, }; +static const struct nfct_h323_nat_hooks nathooks = { + .set_h245_addr = set_h245_addr, + .set_h225_addr = set_h225_addr, + .set_sig_addr = set_sig_addr, + .set_ras_addr = set_ras_addr, + .nat_rtp_rtcp = nat_rtp_rtcp, + .nat_t120 = nat_t120, + .nat_h245 = nat_h245, + .nat_callforwarding = nat_callforwarding, + .nat_q931 = nat_q931, +}; + /****************************************************************************/ static int __init nf_nat_h323_init(void) { - BUG_ON(set_h245_addr_hook != NULL); - BUG_ON(set_h225_addr_hook != NULL); - BUG_ON(set_sig_addr_hook != NULL); - BUG_ON(set_ras_addr_hook != NULL); - BUG_ON(nat_rtp_rtcp_hook != NULL); - BUG_ON(nat_t120_hook != NULL); - BUG_ON(nat_h245_hook != NULL); - BUG_ON(nat_callforwarding_hook != NULL); - BUG_ON(nat_q931_hook != NULL); - - RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr); - RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr); - RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr); - RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr); - RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp); - RCU_INIT_POINTER(nat_t120_hook, nat_t120); - RCU_INIT_POINTER(nat_h245_hook, nat_h245); - RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding); - RCU_INIT_POINTER(nat_q931_hook, nat_q931); + RCU_INIT_POINTER(nfct_h323_nat_hook, &nathooks); nf_ct_helper_expectfn_register(&q931_nat); nf_ct_helper_expectfn_register(&callforwarding_nat); return 0; @@ -609,15 +603,7 @@ static int __init nf_nat_h323_init(void) /****************************************************************************/ static void __exit nf_nat_h323_fini(void) { - RCU_INIT_POINTER(set_h245_addr_hook, NULL); - RCU_INIT_POINTER(set_h225_addr_hook, NULL); - RCU_INIT_POINTER(set_sig_addr_hook, NULL); - RCU_INIT_POINTER(set_ras_addr_hook, NULL); - RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL); - RCU_INIT_POINTER(nat_t120_hook, NULL); - RCU_INIT_POINTER(nat_h245_hook, NULL); - RCU_INIT_POINTER(nat_callforwarding_hook, NULL); - RCU_INIT_POINTER(nat_q931_hook, NULL); + RCU_INIT_POINTER(nfct_h323_nat_hook, NULL); nf_ct_helper_expectfn_unregister(&q931_nat); nf_ct_helper_expectfn_unregister(&callforwarding_nat); synchronize_rcu(); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 2eb31ffb3d14..bb76305bb7ff 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -49,64 +49,8 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "if both endpoints are on different sides " "(determined by routing information)"); -/* Hooks for NAT */ -int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff, - unsigned char **data, int dataoff, - H245_TransportAddress *taddr, - union nf_inet_addr *addr, __be16 port) - __read_mostly; -int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff, - unsigned char **data, int dataoff, - TransportAddress *taddr, - union nf_inet_addr *addr, __be16 port) - __read_mostly; -int (*set_sig_addr_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - TransportAddress *taddr, int count) __read_mostly; -int (*set_ras_addr_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned char **data, - TransportAddress *taddr, int count) __read_mostly; -int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, int dataoff, - H245_TransportAddress *taddr, - __be16 port, __be16 rtp_port, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp) __read_mostly; -int (*nat_t120_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, int dataoff, - H245_TransportAddress *taddr, __be16 port, - struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_h245_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, int dataoff, - TransportAddress *taddr, __be16 port, - struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_callforwarding_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, int dataoff, - TransportAddress *taddr, __be16 port, - struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_q931_hook) (struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned char **data, TransportAddress *taddr, int idx, - __be16 port, struct nf_conntrack_expect *exp) - __read_mostly; +const struct nfct_h323_nat_hooks __rcu *nfct_h323_nat_hook __read_mostly; +EXPORT_SYMBOL_GPL(nfct_h323_nat_hook); static DEFINE_SPINLOCK(nf_h323_lock); static char *h323_buffer; @@ -259,6 +203,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, int dataoff, H245_TransportAddress *taddr) { + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret = 0; __be16 port; @@ -266,7 +211,6 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, union nf_inet_addr addr; struct nf_conntrack_expect *rtp_exp; struct nf_conntrack_expect *rtcp_exp; - typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp; /* Read RTP or RTCP address */ if (!get_h245_addr(ct, *data, taddr, &addr, &port) || @@ -296,15 +240,16 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_UDP, NULL, &rtcp_port); + nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && - (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && + nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, - taddr, port, rtp_port, rtp_exp, rtcp_exp); + ret = nathook->nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, + taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp, 0) == 0) { if (nf_ct_expect_related(rtcp_exp, 0) == 0) { @@ -333,12 +278,12 @@ static int expect_t120(struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr) { + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret = 0; __be16 port; union nf_inet_addr addr; struct nf_conntrack_expect *exp; - typeof(nat_t120_hook) nat_t120; /* Read T.120 address */ if (!get_h245_addr(ct, *data, taddr, &addr, &port) || @@ -355,15 +300,16 @@ static int expect_t120(struct sk_buff *skb, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple channels */ + nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && - (nat_t120 = rcu_dereference(nat_t120_hook)) && + nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr, - port, exp); + ret = nathook->nat_t120(skb, ct, ctinfo, protoff, data, + dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_h323: expect T.120 "); @@ -664,18 +610,19 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, return 1; } +EXPORT_SYMBOL_GPL(get_h225_addr); static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff, unsigned char **data, int dataoff, TransportAddress *taddr) { + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret = 0; __be16 port; union nf_inet_addr addr; struct nf_conntrack_expect *exp; - typeof(nat_h245_hook) nat_h245; /* Read h245Address */ if (!get_h225_addr(ct, *data, taddr, &addr, &port) || @@ -692,15 +639,16 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, IPPROTO_TCP, NULL, &port); exp->helper = &nf_conntrack_helper_h245; + nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && - (nat_h245 = rcu_dereference(nat_h245_hook)) && + nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr, - port, exp); + ret = nathook->nat_h245(skb, ct, ctinfo, protoff, data, + dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_q931: expect H.245 "); @@ -785,13 +733,13 @@ static int expect_callforwarding(struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr) { + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret = 0; __be16 port; union nf_inet_addr addr; struct nf_conntrack_expect *exp; struct net *net = nf_ct_net(ct); - typeof(nat_callforwarding_hook) nat_callforwarding; /* Read alternativeAddress */ if (!get_h225_addr(ct, *data, taddr, &addr, &port) || port == 0) @@ -815,16 +763,17 @@ static int expect_callforwarding(struct sk_buff *skb, IPPROTO_TCP, NULL, &port); exp->helper = nf_conntrack_helper_q931; + nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(ct->tuplehash[dir].tuple.src.u3)) && - (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && + nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_callforwarding(skb, ct, ctinfo, - protoff, data, dataoff, - taddr, port, exp); + ret = nathook->nat_callforwarding(skb, ct, ctinfo, + protoff, data, dataoff, + taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_q931: expect Call Forwarding "); @@ -844,12 +793,12 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, int dataoff, Setup_UUIE *setup) { + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret; int i; __be16 port; union nf_inet_addr addr; - typeof(set_h225_addr_hook) set_h225_addr; pr_debug("nf_ct_q931: Setup\n"); @@ -860,9 +809,9 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, return -1; } - set_h225_addr = rcu_dereference(set_h225_addr_hook); + nathook = rcu_dereference(nfct_h323_nat_hook); if ((setup->options & eSetup_UUIE_destCallSignalAddress) && - (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK && get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && @@ -870,16 +819,16 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3, ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr(skb, protoff, data, dataoff, - &setup->destCallSignalAddress, - &ct->tuplehash[!dir].tuple.src.u3, - ct->tuplehash[!dir].tuple.src.u.tcp.port); + ret = nathook->set_h225_addr(skb, protoff, data, dataoff, + &setup->destCallSignalAddress, + &ct->tuplehash[!dir].tuple.src.u3, + ct->tuplehash[!dir].tuple.src.u.tcp.port); if (ret < 0) return -1; } if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) && - (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK && get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && @@ -887,10 +836,10 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3, ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr(skb, protoff, data, dataoff, - &setup->sourceCallSignalAddress, - &ct->tuplehash[!dir].tuple.dst.u3, - ct->tuplehash[!dir].tuple.dst.u.tcp.port); + ret = nathook->set_h225_addr(skb, protoff, data, dataoff, + &setup->sourceCallSignalAddress, + &ct->tuplehash[!dir].tuple.dst.u3, + ct->tuplehash[!dir].tuple.dst.u.tcp.port); if (ret < 0) return -1; } @@ -1249,13 +1198,13 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, TransportAddress *taddr, int count) { struct nf_ct_h323_master *info = nfct_help_data(ct); + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret = 0; int i; __be16 port; union nf_inet_addr addr; struct nf_conntrack_expect *exp; - typeof(nat_q931_hook) nat_q931; /* Look for the first related address */ for (i = 0; i < count; i++) { @@ -1279,11 +1228,11 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, exp->helper = nf_conntrack_helper_q931; exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ - nat_q931 = rcu_dereference(nat_q931_hook); - if (nat_q931 && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_q931(skb, ct, ctinfo, protoff, data, - taddr, i, port, exp); + ret = nathook->nat_q931(skb, ct, ctinfo, protoff, data, + taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1305,15 +1254,15 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned char **data, GatekeeperRequest *grq) { - typeof(set_ras_addr_hook) set_ras_addr; + const struct nfct_h323_nat_hooks *nathook; pr_debug("nf_ct_ras: GRQ\n"); - set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) /* NATed */ - return set_ras_addr(skb, ct, ctinfo, protoff, data, - &grq->rasAddress, 1); + return nathook->set_ras_addr(skb, ct, ctinfo, protoff, data, + &grq->rasAddress, 1); return 0; } @@ -1367,8 +1316,8 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, RegistrationRequest *rrq) { struct nf_ct_h323_master *info = nfct_help_data(ct); + const struct nfct_h323_nat_hooks *nathook; int ret; - typeof(set_ras_addr_hook) set_ras_addr; pr_debug("nf_ct_ras: RRQ\n"); @@ -1378,12 +1327,12 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, if (ret < 0) return -1; - set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, protoff, data, - rrq->rasAddress.item, - rrq->rasAddress.count); + ret = nathook->set_ras_addr(skb, ct, ctinfo, protoff, data, + rrq->rasAddress.item, + rrq->rasAddress.count); if (ret < 0) return -1; } @@ -1403,19 +1352,19 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, RegistrationConfirm *rcf) { struct nf_ct_h323_master *info = nfct_help_data(ct); + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret; struct nf_conntrack_expect *exp; - typeof(set_sig_addr_hook) set_sig_addr; pr_debug("nf_ct_ras: RCF\n"); - set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, protoff, data, - rcf->callSignalAddress.item, - rcf->callSignalAddress.count); + ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data, + rcf->callSignalAddress.item, + rcf->callSignalAddress.count); if (ret < 0) return -1; } @@ -1454,18 +1403,18 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, UnregistrationRequest *urq) { struct nf_ct_h323_master *info = nfct_help_data(ct); + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); int ret; - typeof(set_sig_addr_hook) set_sig_addr; pr_debug("nf_ct_ras: URQ\n"); - set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, protoff, data, - urq->callSignalAddress.item, - urq->callSignalAddress.count); + ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data, + urq->callSignalAddress.item, + urq->callSignalAddress.count); if (ret < 0) return -1; } @@ -1487,39 +1436,42 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, AdmissionRequest *arq) { const struct nf_ct_h323_master *info = nfct_help_data(ct); + const struct nfct_h323_nat_hooks *nathook; int dir = CTINFO2DIR(ctinfo); __be16 port; union nf_inet_addr addr; - typeof(set_h225_addr_hook) set_h225_addr; pr_debug("nf_ct_ras: ARQ\n"); - set_h225_addr = rcu_dereference(set_h225_addr_hook); + nathook = rcu_dereference(nfct_h323_nat_hook); + if (!nathook) + return 0; + if ((arq->options & eAdmissionRequest_destCallSignalAddress) && get_h225_addr(ct, *data, &arq->destCallSignalAddress, &addr, &port) && !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && port == info->sig_port[dir] && nf_ct_l3num(ct) == NFPROTO_IPV4 && - set_h225_addr && ct->status & IPS_NAT_MASK) { + ct->status & IPS_NAT_MASK) { /* Answering ARQ */ - return set_h225_addr(skb, protoff, data, 0, - &arq->destCallSignalAddress, - &ct->tuplehash[!dir].tuple.dst.u3, - info->sig_port[!dir]); + return nathook->set_h225_addr(skb, protoff, data, 0, + &arq->destCallSignalAddress, + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir]); } if ((arq->options & eAdmissionRequest_srcCallSignalAddress) && get_h225_addr(ct, *data, &arq->srcCallSignalAddress, &addr, &port) && !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && - set_h225_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { /* Calling ARQ */ - return set_h225_addr(skb, protoff, data, 0, - &arq->srcCallSignalAddress, - &ct->tuplehash[!dir].tuple.dst.u3, - port); + return nathook->set_h225_addr(skb, protoff, data, 0, + &arq->srcCallSignalAddress, + &ct->tuplehash[!dir].tuple.dst.u3, + port); } return 0; @@ -1535,7 +1487,6 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, __be16 port; union nf_inet_addr addr; struct nf_conntrack_expect *exp; - typeof(set_sig_addr_hook) set_sig_addr; pr_debug("nf_ct_ras: ACF\n"); @@ -1544,12 +1495,15 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, return 0; if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) { + const struct nfct_h323_nat_hooks *nathook; + /* Answering ACF */ - set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - return set_sig_addr(skb, ct, ctinfo, protoff, data, - &acf->destCallSignalAddress, 1); + return nathook->set_sig_addr(skb, ct, ctinfo, protoff, + data, + &acf->destCallSignalAddress, 1); return 0; } @@ -1578,15 +1532,15 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned char **data, LocationRequest *lrq) { - typeof(set_ras_addr_hook) set_ras_addr; + const struct nfct_h323_nat_hooks *nathook; pr_debug("nf_ct_ras: LRQ\n"); - set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) - return set_ras_addr(skb, ct, ctinfo, protoff, data, - &lrq->replyAddress, 1); + return nathook->set_ras_addr(skb, ct, ctinfo, protoff, data, + &lrq->replyAddress, 1); return 0; } @@ -1634,27 +1588,22 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned char **data, InfoRequestResponse *irr) { + const struct nfct_h323_nat_hooks *nathook; int ret; - typeof(set_ras_addr_hook) set_ras_addr; - typeof(set_sig_addr_hook) set_sig_addr; pr_debug("nf_ct_ras: IRR\n"); - set_ras_addr = rcu_dereference(set_ras_addr_hook); - if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && + nathook = rcu_dereference(nfct_h323_nat_hook); + if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, protoff, data, - &irr->rasAddress, 1); + ret = nathook->set_ras_addr(skb, ct, ctinfo, protoff, data, + &irr->rasAddress, 1); if (ret < 0) return -1; - } - set_sig_addr = rcu_dereference(set_sig_addr_hook); - if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 && - ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, protoff, data, - irr->callSignalAddress.item, - irr->callSignalAddress.count); + ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data, + irr->callSignalAddress.item, + irr->callSignalAddress.count); if (ret < 0) return -1; } @@ -1837,17 +1786,6 @@ err1: module_init(nf_conntrack_h323_init); module_exit(nf_conntrack_h323_fini); -EXPORT_SYMBOL_GPL(get_h225_addr); -EXPORT_SYMBOL_GPL(set_h245_addr_hook); -EXPORT_SYMBOL_GPL(set_h225_addr_hook); -EXPORT_SYMBOL_GPL(set_sig_addr_hook); -EXPORT_SYMBOL_GPL(set_ras_addr_hook); -EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook); -EXPORT_SYMBOL_GPL(nat_t120_hook); -EXPORT_SYMBOL_GPL(nat_h245_hook); -EXPORT_SYMBOL_GPL(nat_callforwarding_hook); -EXPORT_SYMBOL_GPL(nat_q931_hook); - MODULE_AUTHOR("Jing Min Zhao "); MODULE_DESCRIPTION("H.323 connection tracking helper"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From f16214c102f0f64b2f3546e989498525bd7b7708 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 11 Jul 2022 10:12:00 +0200 Subject: bpf: Fix 'dubious one-bit signed bitfield' warnings Our CI[1] reported these warnings when using Sparse: $ touch net/mptcp/bpf.c $ make C=1 net/mptcp/bpf.o net/mptcp/bpf.c: note: in included file: include/linux/bpf_verifier.h:348:26: error: dubious one-bit signed bitfield include/linux/bpf_verifier.h:349:29: error: dubious one-bit signed bitfield Set them as 'unsigned' to avoid warnings. [1] https://github.com/multipath-tcp/mptcp_net-next/actions/runs/2643588487 Fixes: 1ade23711971 ("bpf: Inline calls to bpf_loop when callback is known") Signed-off-by: Matthieu Baerts Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220711081200.2081262-1-matthieu.baerts@tessares.net --- include/linux/bpf_verifier.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 81b19669efba..2e3bad8640dc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -345,10 +345,10 @@ struct bpf_verifier_state_list { }; struct bpf_loop_inline_state { - int initialized:1; /* set to true upon first entry */ - int fit_for_inline:1; /* true if callback function is the same - * at each call and flags are always zero - */ + unsigned int initialized:1; /* set to true upon first entry */ + unsigned int fit_for_inline:1; /* true if callback function is the same + * at each call and flags are always zero + */ u32 callback_subprogno; /* valid when fit_for_inline is true */ }; -- cgit v1.2.3 From 7b19119f4c7dc9412b556ea12fae6b32574e2810 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Jul 2022 01:14:06 -0700 Subject: net/mlx5: Use devl_ API in mlx5e_devlink_port_register As part of the flows invoked by mlx5_devlink_eswitch_mode_set() get to mlx5_rescan_drivers_locked() which can call mlx5e_probe()/mlx5e_remove and register/unregister mlx5e driver ports accordingly. This can lead to deadlock once mlx5_devlink_eswitch_mode_set() will use devlink lock. Use devl_port_register/unregister() instead of devlink_port_register/unregister() and add devlink instance locks in the driver paths to this function to have it locked while calling devl_ API function. If remove or probe were called by module init or module cleanup flows, need to lock devlink just before calling devl_port_register(), otherwise it is called by attach/detach or register/unregister flow and we can have the flow locked. Added flag to distinguish between these cases. This will be used by the downstream patch to invoke mlx5_devlink_eswitch_mode_set() with devlink locked. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 29 ++++++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en/devlink.c | 16 ++++++++++-- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 ++ include/linux/mlx5/driver.h | 4 +++ 4 files changed, 47 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 50422b56a64d..ccf2068d2e79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -335,13 +335,16 @@ static void del_adev(struct auxiliary_device *adev) int mlx5_attach_device(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; struct auxiliary_driver *adrv; int ret = 0, i; + devl_lock(devlink); mutex_lock(&mlx5_intf_mutex); priv->flags &= ~MLX5_PRIV_FLAGS_DETACH; + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { if (!priv->adev[i]) { bool is_supported = false; @@ -389,19 +392,24 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) break; } } + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; mutex_unlock(&mlx5_intf_mutex); + devl_unlock(devlink); return ret; } void mlx5_detach_device(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; struct auxiliary_driver *adrv; pm_message_t pm = {}; int i; + devl_lock(devlink); mutex_lock(&mlx5_intf_mutex); + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { if (!priv->adev[i]) continue; @@ -430,18 +438,24 @@ skip_suspend: del_adev(&priv->adev[i]->adev); priv->adev[i] = NULL; } + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; priv->flags |= MLX5_PRIV_FLAGS_DETACH; mutex_unlock(&mlx5_intf_mutex); + devl_unlock(devlink); } int mlx5_register_device(struct mlx5_core_dev *dev) { + struct devlink *devlink; int ret; + devlink = priv_to_devlink(dev); + devl_lock(devlink); mutex_lock(&mlx5_intf_mutex); dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; ret = mlx5_rescan_drivers_locked(dev); mutex_unlock(&mlx5_intf_mutex); + devl_unlock(devlink); if (ret) mlx5_unregister_device(dev); @@ -450,10 +464,15 @@ int mlx5_register_device(struct mlx5_core_dev *dev) void mlx5_unregister_device(struct mlx5_core_dev *dev) { + struct devlink *devlink; + + devlink = priv_to_devlink(dev); + devl_lock(devlink); mutex_lock(&mlx5_intf_mutex); dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; mlx5_rescan_drivers_locked(dev); mutex_unlock(&mlx5_intf_mutex); + devl_unlock(devlink); } static int add_drivers(struct mlx5_core_dev *dev) @@ -526,16 +545,22 @@ del_adev: int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; + int err = 0; lockdep_assert_held(&mlx5_intf_mutex); if (priv->flags & MLX5_PRIV_FLAGS_DETACH) return 0; + priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; delete_drivers(dev); if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) - return 0; + goto out; + + err = add_drivers(dev); - return add_drivers(dev); +out: + priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; + return err; } bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index ae52e7f38306..b69f9d10ccbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -21,6 +21,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv) struct netdev_phys_item_id ppid = {}; struct devlink_port *dl_port; unsigned int dl_port_index; + int ret; if (mlx5_core_is_pf(priv->mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; @@ -41,7 +42,13 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv) memset(dl_port, 0, sizeof(*dl_port)); devlink_port_attrs_set(dl_port, &attrs); - return devlink_port_register(devlink, dl_port, dl_port_index); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_lock(devlink); + ret = devl_port_register(devlink, dl_port, dl_port_index); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_unlock(devlink); + + return ret; } void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) @@ -54,8 +61,13 @@ void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) { struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + struct devlink *devlink = priv_to_devlink(priv->mdev); - devlink_port_unregister(dl_port); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_lock(devlink); + devl_port_unregister(dl_port); + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) + devl_unlock(devlink); } struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1bfbc88f513f..ccda3a0a2594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3400,7 +3400,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = esw_offloads_start(esw, extack); } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { err = esw_offloads_stop(esw, extack); + devl_lock(devlink); mlx5_rescan_drivers(esw->dev); + devl_unlock(devlink); } else { err = -EINVAL; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 76d7661e3e63..bd882884b23c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -551,6 +551,10 @@ enum { * creation/deletion on drivers rescan. Unset during device attach. */ MLX5_PRIV_FLAGS_DETACH = 1 << 2, + /* Distinguish between mlx5e_probe/remove called by module init/cleanup + * and called by other flows which can already hold devlink lock + */ + MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW = 1 << 3, }; struct mlx5_adev { -- cgit v1.2.3 From 4201d9ab3e42d9e2a20320b751a931e6239c0df2 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 11 Jul 2022 09:28:27 -0700 Subject: bpf: reparent bpf maps on memcg offlining The memory consumed by a bpf map is always accounted to the memory cgroup of the process which created the map. The map can outlive the memory cgroup if it's used by processes in other cgroups or is pinned on bpffs. In this case the map pins the original cgroup in the dying state. For other types of objects (slab objects, non-slab kernel allocations, percpu objects and recently LRU pages) there is a reparenting process implemented: on cgroup offlining charged objects are getting reassigned to the parent cgroup. Because all charges and statistics are fully recursive it's a fairly cheap operation. For efficiency and consistency with other types of objects, let's do the same for bpf maps. Fortunately thanks to the objcg API, the required changes are minimal. Please, note that individual allocations (slabs, percpu and large kmallocs) already have the reparenting mechanism. This commit adds it to the saved map->memcg pointer by replacing it to map->objcg. Because dying cgroups are not visible for a user and all charges are recursive, this commit doesn't bring any behavior changes for a user. v2: added a missing const qualifier Signed-off-by: Roman Gushchin Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20220711162827.184743-1-roman.gushchin@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/syscall.c | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b21f2a3452f..85a4db3e0536 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -221,7 +221,7 @@ struct bpf_map { u32 btf_vmlinux_value_type_id; struct btf *btf; #ifdef CONFIG_MEMCG_KMEM - struct mem_cgroup *memcg; + struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; struct bpf_map_off_arr *off_arr; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ab688d85b2c6..83c7136c5788 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -419,35 +419,53 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) #ifdef CONFIG_MEMCG_KMEM static void bpf_map_save_memcg(struct bpf_map *map) { - map->memcg = get_mem_cgroup_from_mm(current->mm); + /* Currently if a map is created by a process belonging to the root + * memory cgroup, get_obj_cgroup_from_current() will return NULL. + * So we have to check map->objcg for being NULL each time it's + * being used. + */ + map->objcg = get_obj_cgroup_from_current(); } static void bpf_map_release_memcg(struct bpf_map *map) { - mem_cgroup_put(map->memcg); + if (map->objcg) + obj_cgroup_put(map->objcg); +} + +static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) +{ + if (map->objcg) + return get_mem_cgroup_from_objcg(map->objcg); + + return root_mem_cgroup; } void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node) { - struct mem_cgroup *old_memcg; + struct mem_cgroup *memcg, *old_memcg; void *ptr; - old_memcg = set_active_memcg(map->memcg); + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); set_active_memcg(old_memcg); + mem_cgroup_put(memcg); return ptr; } void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) { - struct mem_cgroup *old_memcg; + struct mem_cgroup *memcg, *old_memcg; void *ptr; - old_memcg = set_active_memcg(map->memcg); + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); ptr = kzalloc(size, flags | __GFP_ACCOUNT); set_active_memcg(old_memcg); + mem_cgroup_put(memcg); return ptr; } @@ -455,12 +473,14 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags) { - struct mem_cgroup *old_memcg; + struct mem_cgroup *memcg, *old_memcg; void __percpu *ptr; - old_memcg = set_active_memcg(map->memcg); + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); set_active_memcg(old_memcg); + mem_cgroup_put(memcg); return ptr; } -- cgit v1.2.3 From 1d5f82d9dd477d5c66e0214a68c3e4f308eadd6d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 5 Jul 2022 17:26:12 -0700 Subject: bpf, x86: fix freeing of not-finalized bpf_prog_pack syzbot reported a few issues with bpf_prog_pack [1], [2]. This only happens with multiple subprogs. In jit_subprogs(), we first call bpf_int_jit_compile() on each sub program. And then, we call it on each sub program again. jit_data is not freed in the first call of bpf_int_jit_compile(). Similarly we don't call bpf_jit_binary_pack_finalize() in the first call of bpf_int_jit_compile(). If bpf_int_jit_compile() failed for one sub program, we will call bpf_jit_binary_pack_finalize() for this sub program. However, we don't have a chance to call it for other sub programs. Then we will hit "goto out_free" in jit_subprogs(), and call bpf_jit_free on some subprograms that haven't got bpf_jit_binary_pack_finalize() yet. At this point, bpf_jit_binary_pack_free() is called and the whole 2MB page is freed erroneously. Fix this with a custom bpf_jit_free() for x86_64, which calls bpf_jit_binary_pack_finalize() if necessary. Also, with custom bpf_jit_free(), bpf_prog_aux->use_bpf_prog_pack is not needed any more, remove it. Fixes: 1022a5498f6f ("bpf, x86_64: Use bpf_jit_binary_pack_alloc") [1] https://syzkaller.appspot.com/bug?extid=2f649ec6d2eea1495a8f [2] https://syzkaller.appspot.com/bug?extid=87f65c75f4a72db05445 Reported-by: syzbot+2f649ec6d2eea1495a8f@syzkaller.appspotmail.com Reported-by: syzbot+87f65c75f4a72db05445@syzkaller.appspotmail.com Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20220706002612.4013790-1-song@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 25 +++++++++++++++++++++++++ include/linux/bpf.h | 1 - include/linux/filter.h | 8 ++++++++ kernel/bpf/core.c | 29 ++++++++++++----------------- 4 files changed, 45 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d2614f1bf838..54c7f46c453f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2486,3 +2486,28 @@ bool bpf_jit_supports_subprog_tailcalls(void) { return true; } + +void bpf_jit_free(struct bpf_prog *prog) +{ + if (prog->jited) { + struct x64_jit_data *jit_data = prog->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), + * the program may not be finalized yet. Call finalize here + * before freeing it. + */ + if (jit_data) { + bpf_jit_binary_pack_finalize(prog, jit_data->header, + jit_data->rw_header); + kvfree(jit_data->addrs); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(prog); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); + } + + bpf_prog_unlock_free(prog); +} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 85a4db3e0536..a5bf00649995 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1044,7 +1044,6 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; - bool use_bpf_prog_pack; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4c1a8b247545..a5f21dc3c432 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1027,6 +1027,14 @@ u64 bpf_jit_alloc_exec_limit(void); void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_binary_header * +bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); + +static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) +{ + return list_empty(&fp->aux->ksym.lnode) || + fp->aux->ksym.lnode.prev == LIST_POISON2; +} struct bpf_binary_header * bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 805c2ad5c793..cfb8a50a9f12 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -650,12 +650,6 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) return fp->jited && !bpf_prog_was_classic(fp); } -static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) -{ - return list_empty(&fp->aux->ksym.lnode) || - fp->aux->ksym.lnode.prev == LIST_POISON2; -} - void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || @@ -1153,7 +1147,6 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, bpf_prog_pack_free(ro_header); return PTR_ERR(ptr); } - prog->aux->use_bpf_prog_pack = true; return 0; } @@ -1177,17 +1170,23 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, bpf_jit_uncharge_modmem(size); } +struct bpf_binary_header * +bpf_jit_binary_pack_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr; + + addr = real_start & BPF_PROG_CHUNK_MASK; + return (void *)addr; +} + static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) { unsigned long real_start = (unsigned long)fp->bpf_func; unsigned long addr; - if (fp->aux->use_bpf_prog_pack) - addr = real_start & BPF_PROG_CHUNK_MASK; - else - addr = real_start & PAGE_MASK; - + addr = real_start & PAGE_MASK; return (void *)addr; } @@ -1200,11 +1199,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp) if (fp->jited) { struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); - if (fp->aux->use_bpf_prog_pack) - bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */); - else - bpf_jit_binary_free(hdr); - + bpf_jit_binary_free(hdr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); } -- cgit v1.2.3 From 0372c546eca575445331c0ad8902210b70be6d61 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 2 Jun 2022 12:41:00 +0300 Subject: net/mlx5: Introduce ifc bits for using software vhca id Introduce ifc related stuff to enable using software vhca id functionality. Signed-off-by: Yishai Hadas Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8e87eb47f9dc..254cc22f5eec 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1826,7 +1826,14 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 max_reformat_remove_size[0x8]; u8 max_reformat_remove_offset[0x8]; - u8 reserved_at_c0[0x740]; + u8 reserved_at_c0[0x160]; + + u8 reserved_at_220[0x1]; + u8 sw_vhca_id_valid[0x1]; + u8 sw_vhca_id[0xe]; + u8 reserved_at_230[0x10]; + + u8 reserved_at_240[0x5c0]; }; enum mlx5_ifc_flow_destination_type { @@ -3782,6 +3789,11 @@ struct mlx5_ifc_rmpc_bits { struct mlx5_ifc_wq_bits wq; }; +enum { + VHCA_ID_TYPE_HW = 0, + VHCA_ID_TYPE_SW = 1, +}; + struct mlx5_ifc_nic_vport_context_bits { u8 reserved_at_0[0x5]; u8 min_wqe_inline_mode[0x3]; @@ -3798,8 +3810,8 @@ struct mlx5_ifc_nic_vport_context_bits { u8 event_on_mc_address_change[0x1]; u8 event_on_uc_address_change[0x1]; - u8 reserved_at_40[0xc]; - + u8 vhca_id_type[0x1]; + u8 reserved_at_41[0xb]; u8 affiliation_criteria[0x4]; u8 affiliated_vhca_id[0x10]; @@ -7259,7 +7271,12 @@ struct mlx5_ifc_init_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x2]; + u8 sw_vhca_id[0xe]; + u8 reserved_at_70[0x10]; + u8 sw_owner_id[4][0x20]; }; -- cgit v1.2.3 From dc402ccc0d7b55922a79505df3000da7deb77a2b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 2 Jun 2022 12:47:34 +0300 Subject: net/mlx5: Use software VHCA id when it's supported Use software VHCA id when it's supported by the firmware. A unique id is allocated upon mlx5_mdev_init() and freed upon mlx5_mdev_uninit(), as such it stays the same during the full life cycle of the device including upon health recovery if occurred. The conjunction of sw_vhca_id with sw_owner_id will be a global unique id per function which uses mlx5_core. The sw_vhca_id is set upon init_hca command and is used to specify the VHCA that the NIC vport is affiliated with. This functionality is needed upon migration of VM which is MPV based. (i.e. multi port device). Signed-off-by: Yishai Hadas Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 4 ++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 49 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 14 +++++-- include/linux/mlx5/driver.h | 1 + 4 files changed, 65 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index cfb8bedba512..079fa44ada71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -289,6 +289,10 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) sw_owner_id[i]); } + if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && + dev->priv.sw_vhca_id > 0) + MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id); + return mlx5_cmd_exec_in(dev, init_hca, in); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a9e51c1b7738..8b621c1ddd14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -90,6 +90,8 @@ module_param_named(prof_sel, prof_sel, uint, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); static u32 sw_owner_id[4]; +#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1) +static DEFINE_IDA(sw_vhca_ida); enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, @@ -492,6 +494,31 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); + if (err) + return err; + + if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) || + !(dev->priv.sw_vhca_id > 0)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, + MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); + MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); + + return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2); +} + static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { struct mlx5_profile *prof = &dev->profile; @@ -662,6 +689,13 @@ static int set_hca_cap(struct mlx5_core_dev *dev) goto out; } + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_2(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_2 failed\n"); + goto out; + } + out: kfree(set_ctx); return err; @@ -1506,6 +1540,18 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) if (err) goto err_hca_caps; + /* The conjunction of sw_vhca_id with sw_owner_id will be a global + * unique id per function which uses mlx5_core. + * Those values are supplied to FW as part of the init HCA command to + * be used by both driver and FW when it's applicable. + */ + dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1, + MAX_SW_VHCA_ID, + GFP_KERNEL); + if (dev->priv.sw_vhca_id < 0) + mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n", + dev->priv.sw_vhca_id); + return 0; err_hca_caps: @@ -1530,6 +1576,9 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; + if (priv->sw_vhca_id > 0) + ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id); + mlx5_hca_caps_free(dev); mlx5_adev_cleanup(dev); mlx5_pagealloc_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ac020cb78072..d5c317325030 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1086,9 +1086,17 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, goto free; MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); - MLX5_SET(modify_nic_vport_context_in, in, - nic_vport_context.affiliated_vhca_id, - MLX5_CAP_GEN(master_mdev, vhca_id)); + if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN_2(master_mdev, sw_vhca_id)); + } else { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN(master_mdev, vhca_id)); + } MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.affiliation_criteria, MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bd882884b23c..ecda6e63d5f2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -610,6 +610,7 @@ struct mlx5_priv { spinlock_t ctx_lock; struct mlx5_adev **adev; int adev_idx; + int sw_vhca_id; struct mlx5_events *events; struct mlx5_flow_steering *steering; -- cgit v1.2.3 From 1e0b3b0b6cb5e04bcb25fbe4164180d64e3ace07 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 27 Apr 2022 18:02:10 +0300 Subject: wifi: mac80211: Align with Draft P802.11be_D1.5 Align the mac80211 implementation with P802.11be_D1.5. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 52 ++++++++++++++++++--------- net/mac80211/ieee80211_i.h | 4 +++ net/mac80211/mlme.c | 32 +++++++++++++++++ net/mac80211/util.c | 87 +++++++++++++++++++++++++++++----------------- 4 files changed, 128 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f386f9ed41f3..e75c73ca11ec 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2046,25 +2046,38 @@ struct ieee80211_eht_cap_elem { u8 optional[]; } __packed; +#define IEEE80211_EHT_OPER_INFO_PRESENT 0x1 +#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x2 + /** * struct ieee80211_eht_operation - eht operation element * * This structure is the "EHT Operation Element" fields as - * described in P802.11be_D1.4 section 9.4.2.311 + * described in P802.11be_D1.5 section 9.4.2.311 * - * FIXME: The spec is unclear how big the fields are, and doesn't - * indicate the "Disabled Subchannel Bitmap Present" in the - * structure (Figure 9-1002a) at all ... + * @params: EHT operation element parameters. See &IEEE80211_EHT_OPER_* + * @optional: optional parts */ struct ieee80211_eht_operation { - u8 chan_width; - u8 ccfs; - u8 present_bm; - - u8 disable_subchannel_bitmap[]; + u8 params; + u8 optional[]; } __packed; -#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x1 +/** + * struct ieee80211_eht_operation_info - eht operation information + * + * @control: EHT operation information control. + * @ccfs0: defines a channel center frequency for a 20, 40, 80, 160, or 320 MHz + * EHT BSS. + * @ccfs1: defines a channel center frequency for a 160 or 320 MHz EHT BSS. + * @optional: optional parts + */ +struct ieee80211_eht_operation_info { + u8 control; + u8 ccfs0; + u8 ccfs1; + u8 optional[]; +} __packed; /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 @@ -2773,10 +2786,12 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 #define IEEE80211_EHT_MAC_CAP0_RESTRICTED_TWT 0x10 #define IEEE80211_EHT_MAC_CAP0_SCS_TRAFFIC_DESC 0x20 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_MASK 0xc0 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_3895 0 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_7991 1 -#define IEEE80211_EHT_MAC_CAP0_MAX_AMPDU_LEN_11454 2 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK 0xc0 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895 0 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991 1 +#define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454 2 + +#define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 /* EHT PHY capabilities as defined in P802.11be_D1.4 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 @@ -2949,8 +2964,13 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) if (len < needed) return false; - if (elem->present_bm & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) - needed += 2; + if (elem->params & IEEE80211_EHT_OPER_INFO_PRESENT) { + needed += 3; + + if (elem->params & + IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) + needed += 2; + } return len >= needed; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f6791b47f78f..aa1e438ee61e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2326,6 +2326,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, const struct ieee80211_vht_operation *oper, const struct ieee80211_ht_operation *htop, struct cfg80211_chan_def *chandef); +void ieee80211_chandef_eht_oper(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_eht_operation *eht_oper, + bool support_160, bool support_320, + struct cfg80211_chan_def *chandef); bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, const struct ieee80211_he_operation *he_oper, const struct ieee80211_eht_operation *eht_oper, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e5c058db451d..39f13f3c29bf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -303,6 +303,38 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, *chandef = vht_chandef; + /* + * handle the case that the EHT operation indicates that it holds EHT + * operation information (in case that the channel width differs from + * the channel width reported in HT/VHT/HE). + */ + if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { + struct cfg80211_chan_def eht_chandef = *chandef; + + ieee80211_chandef_eht_oper(sdata, eht_oper, + eht_chandef.width == + NL80211_CHAN_WIDTH_160, + false, &eht_chandef); + + if (!cfg80211_chandef_valid(&eht_chandef)) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_EHT)) + sdata_info(sdata, + "AP EHT information is invalid, disabling EHT\n"); + ret = IEEE80211_STA_DISABLE_EHT; + goto out; + } + + if (!cfg80211_chandef_compatible(chandef, &eht_chandef)) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_EHT)) + sdata_info(sdata, + "AP EHT information is incompatible, disabling EHT\n"); + ret = IEEE80211_STA_DISABLE_EHT; + goto out; + } + + *chandef = eht_chandef; + } + ret = 0; out: diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3e29ef1f81ad..924192238042 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3459,6 +3459,58 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, return true; } +void ieee80211_chandef_eht_oper(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_eht_operation *eht_oper, + bool support_160, bool support_320, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional; + + chandef->center_freq1 = + ieee80211_channel_to_frequency(info->ccfs0, + chandef->chan->band); + + switch (u8_get_bits(info->control, + IEEE80211_EHT_OPER_CHAN_WIDTH)) { + case IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ: + chandef->width = NL80211_CHAN_WIDTH_20; + break; + case IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ: + chandef->width = NL80211_CHAN_WIDTH_40; + break; + case IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ: + chandef->width = NL80211_CHAN_WIDTH_80; + break; + case IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ: + if (support_160) { + chandef->width = NL80211_CHAN_WIDTH_160; + chandef->center_freq1 = + ieee80211_channel_to_frequency(info->ccfs1, + chandef->chan->band); + } else { + chandef->width = NL80211_CHAN_WIDTH_80; + } + break; + case IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ: + if (support_320) { + chandef->width = NL80211_CHAN_WIDTH_320; + chandef->center_freq1 = + ieee80211_channel_to_frequency(info->ccfs1, + chandef->chan->band); + } else if (support_160) { + chandef->width = NL80211_CHAN_WIDTH_160; + } else { + chandef->width = NL80211_CHAN_WIDTH_80; + + if (chandef->center_freq1 > chandef->chan->center_freq) + chandef->center_freq1 -= 40; + else + chandef->center_freq1 += 40; + } + break; + } +} + bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, const struct ieee80211_he_operation *he_oper, const struct ieee80211_eht_operation *eht_oper, @@ -3539,7 +3591,8 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, break; } - if (!eht_oper) { + if (!eht_oper || + !(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { switch (u8_get_bits(he_6ghz_oper->control, IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) { case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ: @@ -3583,36 +3636,8 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, support_320 = eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; - switch (u8_get_bits(eht_oper->chan_width, - IEEE80211_EHT_OPER_CHAN_WIDTH)) { - case IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ: - he_chandef.width = NL80211_CHAN_WIDTH_20; - break; - case IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ: - he_chandef.width = NL80211_CHAN_WIDTH_40; - break; - case IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ: - he_chandef.width = NL80211_CHAN_WIDTH_80; - break; - case IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ: - if (support_160) - he_chandef.width = NL80211_CHAN_WIDTH_160; - else - he_chandef.width = NL80211_CHAN_WIDTH_80; - break; - case IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ: - if (support_320) - he_chandef.width = NL80211_CHAN_WIDTH_320; - else if (support_160) - he_chandef.width = NL80211_CHAN_WIDTH_160; - else - he_chandef.width = NL80211_CHAN_WIDTH_80; - break; - } - - he_chandef.center_freq1 = - ieee80211_channel_to_frequency(eht_oper->ccfs, - NL80211_BAND_6GHZ); + ieee80211_chandef_eht_oper(sdata, eht_oper, support_160, + support_320, &he_chandef); } if (!cfg80211_chandef_valid(&he_chandef)) { -- cgit v1.2.3 From 062e8e02dfd43c94b0d601c071e8a5c50bed830e Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 1 Jun 2022 17:43:34 +0300 Subject: wifi: mac80211: Align with Draft P802.11be_D2.0 Align the mac80211 implementation with P802.11be_D2.0. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 6 +++--- include/linux/ieee80211.h | 23 +++++++++++++++-------- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 737f8ed56d94..f437d8a65268 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3118,7 +3118,7 @@ static const struct ieee80211_sband_iftype_data sband_capa_2ghz[] = { .has_eht = true, .eht_cap_elem = { .mac_cap_info[0] = - IEEE80211_EHT_MAC_CAP0_NSEP_PRIO_ACCESS | + IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | IEEE80211_EHT_MAC_CAP0_OM_CONTROL | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1, .phy_cap_info[0] = @@ -3271,7 +3271,7 @@ static const struct ieee80211_sband_iftype_data sband_capa_5ghz[] = { .has_eht = true, .eht_cap_elem = { .mac_cap_info[0] = - IEEE80211_EHT_MAC_CAP0_NSEP_PRIO_ACCESS | + IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | IEEE80211_EHT_MAC_CAP0_OM_CONTROL | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1, .phy_cap_info[0] = @@ -3453,7 +3453,7 @@ static const struct ieee80211_sband_iftype_data sband_capa_6ghz[] = { .has_eht = true, .eht_cap_elem = { .mac_cap_info[0] = - IEEE80211_EHT_MAC_CAP0_NSEP_PRIO_ACCESS | + IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | IEEE80211_EHT_MAC_CAP0_OM_CONTROL | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1, .phy_cap_info[0] = diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e75c73ca11ec..cca564372d16 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2020,7 +2020,7 @@ struct ieee80211_eht_mcs_nss_supp_bw { * struct ieee80211_eht_cap_elem_fixed - EHT capabilities fixed data * * This structure is the "EHT Capabilities element" fixed fields as - * described in P802.11be_D1.4 section 9.4.2.313. + * described in P802.11be_D2.0 section 9.4.2.313. * * @mac_cap_info: MAC capabilities, see IEEE80211_EHT_MAC_CAP* * @phy_cap_info: PHY capabilities, see IEEE80211_EHT_PHY_CAP* @@ -2046,20 +2046,27 @@ struct ieee80211_eht_cap_elem { u8 optional[]; } __packed; -#define IEEE80211_EHT_OPER_INFO_PRESENT 0x1 -#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x2 +#define IEEE80211_EHT_OPER_INFO_PRESENT 0x01 +#define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x02 +#define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION 0x04 +#define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT 0x08 +#define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK 0x30 /** * struct ieee80211_eht_operation - eht operation element * * This structure is the "EHT Operation Element" fields as - * described in P802.11be_D1.5 section 9.4.2.311 + * described in P802.11be_D2.0 section 9.4.2.311 * * @params: EHT operation element parameters. See &IEEE80211_EHT_OPER_* + * @basic_mcs_nss: indicates the EHT-MCSs for each number of spatial streams in + * EHT PPDUs that are supported by all EHT STAs in the BSS in transmit and + * receive. * @optional: optional parts */ struct ieee80211_eht_operation { u8 params; + __le32 basic_mcs_nss; u8 optional[]; } __packed; @@ -2779,8 +2786,8 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) -/* EHT MAC capabilities as defined in P802.11be_D1.4 section 9.4.2.313.2 */ -#define IEEE80211_EHT_MAC_CAP0_NSEP_PRIO_ACCESS 0x01 +/* EHT MAC capabilities as defined in P802.11be_D2.0 section 9.4.2.313.2 */ +#define IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS 0x01 #define IEEE80211_EHT_MAC_CAP0_OM_CONTROL 0x02 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 0x04 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 @@ -2793,7 +2800,7 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 -/* EHT PHY capabilities as defined in P802.11be_D1.4 section 9.4.2.313.3 */ +/* EHT PHY capabilities as defined in P802.11be_D2.0 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 #define IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ 0x04 #define IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI 0x08 @@ -2858,7 +2865,7 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA 0x02 /* - * EHT operation channel width as defined in P802.11be_D1.4 section 9.4.2.311 + * EHT operation channel width as defined in P802.11be_D2.0 section 9.4.2.311 */ #define IEEE80211_EHT_OPER_CHAN_WIDTH 0x7 #define IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ 0 -- cgit v1.2.3 From e68c5dcf0aacc48a23cedcb3ce81b8c60837f48c Mon Sep 17 00:00:00 2001 From: Jaehee Park Date: Wed, 13 Jul 2022 16:40:47 -0700 Subject: net: ipv4: new arp_accept option to accept garp only if in-network In many deployments, we want the option to not learn a neighbor from garp if the src ip is not in the same subnet as an address configured on the interface that received the garp message. net.ipv4.arp_accept sysctl is currently used to control creation of a neigh from a received garp packet. This patch adds a new option '2' to net.ipv4.arp_accept which extends option '1' by including the subnet check. Signed-off-by: Jaehee Park Suggested-by: Roopa Prabhu Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 9 ++++++--- include/linux/inetdevice.h | 2 +- net/ipv4/arp.c | 24 ++++++++++++++++++++++-- 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 2b329042b38c..b31601405c54 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1633,12 +1633,15 @@ arp_notify - BOOLEAN or hardware address changes. == ========================================================== -arp_accept - BOOLEAN - Define behavior for gratuitous ARP frames who's IP is not - already present in the ARP table: +arp_accept - INTEGER + Define behavior for accepting gratuitous ARP (garp) frames from devices + that are not already present in the ARP table: - 0 - don't create new entries in the ARP table - 1 - create new entries in the ARP table + - 2 - create new entries only if the source IP address is in the same + subnet as an address configured on the interface that received the + garp message. Both replies and requests type gratuitous arp will trigger the ARP table to be updated, if this setting is on. diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ead323243e7b..ddb27fc0ee8c 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -131,7 +131,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) IN_DEV_ORCONF((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) -#define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) +#define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index af2f12ffc9ca..87c7e3fc5197 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -429,6 +429,26 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) return !inet_confirm_addr(net, in_dev, sip, tip, scope); } +static int arp_accept(struct in_device *in_dev, __be32 sip) +{ + struct net *net = dev_net(in_dev->dev); + int scope = RT_SCOPE_LINK; + + switch (IN_DEV_ARP_ACCEPT(in_dev)) { + case 0: /* Don't create new entries from garp */ + return 0; + case 1: /* Create new entries from garp */ + return 1; + case 2: /* Create a neighbor in the arp table only if sip + * is in the same subnet as an address configured + * on the interface that received the garp message + */ + return !!inet_confirm_addr(net, in_dev, sip, 0, scope); + default: + return 0; + } +} + static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { struct rtable *rt; @@ -868,12 +888,12 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); addr_type = -1; - if (n || IN_DEV_ARP_ACCEPT(in_dev)) { + if (n || arp_accept(in_dev, sip)) { is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, sip, tip, sha, tha); } - if (IN_DEV_ARP_ACCEPT(in_dev)) { + if (arp_accept(in_dev, sip)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) -- cgit v1.2.3 From fd1894224407c484f652ad456e1ce423e89bb3eb Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 15 Jul 2022 19:55:59 +0800 Subject: bpf: Don't redirect packets with invalid pkt_len Syzbot found an issue [1]: fq_codel_drop() try to drop a flow whitout any skbs, that is, the flow->head is null. The root cause, as the [2] says, is because that bpf_prog_test_run_skb() run a bpf prog which redirects empty skbs. So we should determine whether the length of the packet modified by bpf prog or others like bpf_prog_test is valid before forwarding it directly. LINK: [1] https://syzkaller.appspot.com/bug?id=0b84da80c2917757915afa89f7738a9d16ec96c5 LINK: [2] https://www.spinics.net/lists/netdev/msg777503.html Reported-by: syzbot+7a12909485b94426aceb@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220715115559.139691-1-shaozhengchao@huawei.com Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 8 ++++++++ net/bpf/test_run.c | 3 +++ net/core/dev.c | 1 + 3 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f6a27ab19202..82e8368ba6e6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2459,6 +2459,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +static inline void skb_assert_len(struct sk_buff *skb) +{ +#ifdef CONFIG_DEBUG_NET + if (WARN_ONCE(!skb->len, "%s\n", __func__)) + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); +#endif /* CONFIG_DEBUG_NET */ +} + /* * Add data to an sk_buff */ diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2ca96acbc50a..dc9dc0bedca0 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -955,6 +955,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + if (!skb->len) + return -EINVAL; + if (!__skb) return 0; diff --git a/net/core/dev.c b/net/core/dev.c index 978ed0622d8f..e241a475036f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4168,6 +4168,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) bool again = false; skb_reset_mac_header(skb); + skb_assert_len(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); -- cgit v1.2.3 From 2e5e4185ff89ea0fc44c9dead8dccf306a3b3bbb Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 4 Jul 2022 19:34:08 +0300 Subject: net/mlx5: Expose ts_cqe_metadata_size2wqe_counter Add capability field which indicates the mask for wqe_counter which connects between loopback CQE and the original WQE. With this connection the driver can identify lost of the loopback CQE and reply PTP synchronization with timestamp given in the original CQE. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 254cc22f5eec..51b4e71017ee 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1833,7 +1833,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 sw_vhca_id[0xe]; u8 reserved_at_230[0x10]; - u8 reserved_at_240[0x5c0]; + u8 reserved_at_240[0xb]; + u8 ts_cqe_metadata_size2wqe_counter[0x5]; + u8 reserved_at_250[0x10]; + + u8 reserved_at_260[0x5a0]; }; enum mlx5_ifc_flow_destination_type { -- cgit v1.2.3 From 9cb61fda8c71baaff423fe5be2e609100860fa78 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 20 Jul 2022 08:52:20 -0700 Subject: bpf: Fix bpf_trampoline_{,un}link_cgroup_shim ifdef guards They were updated in kernel/bpf/trampoline.c to fix another build issue. We should to do the same for include/linux/bpf.h header. Fixes: 3908fcddc65d ("bpf: fix lsm_cgroup build errors on esoteric configs") Reported-by: Stephen Rothwell Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220720155220.4087433-1-sdf@google.com --- include/linux/bpf.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a5bf00649995..11950029284f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1254,9 +1254,6 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif -int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype); -void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { @@ -1280,6 +1277,13 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +#endif + +#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + int cgroup_atype); +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); +#else static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, int cgroup_atype) { -- cgit v1.2.3 From 5588d628027092e66195097bdf6835ddf64418b3 Mon Sep 17 00:00:00 2001 From: Łukasz Spintzyk Date: Wed, 20 Jul 2022 08:05:18 +0200 Subject: net/cdc_ncm: Increase NTB max RX/TX values to 64kb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DisplayLink ethernet devices require NTB buffers larger then 32kb in order to run with highest performance. This patch is changing upper limit of the rx and tx buffers. Those buffers are initialized with CDC_NCM_NTB_DEF_SIZE_RX and CDC_NCM_NTB_DEF_SIZE_TX which is 16kb so by default no device is affected by increased limit. Rx and tx buffer is increased under two conditions: - Device need to advertise that it supports higher buffer size in dwNtbMaxInMaxSize and dwNtbMaxOutMaxSize. - cdc_ncm/rx_max and cdc_ncm/tx_max driver parameters must be adjusted with udev rule or ethtool. Summary of testing and performance results: Tests were performed on following devices: - DisplayLink DL-3xxx family device - DisplayLink DL-6xxx family device - ASUS USB-C2500 2.5G USB3 ethernet adapter - Plugable USB3 1G USB3 ethernet adapter - EDIMAX EU-4307 USB-C ethernet adapter - Dell DBQBCBC064 USB-C ethernet adapter Performance measurements were done with: - iperf3 between two linux boxes - http://openspeedtest.com/ instance running on local test machine Insights from tests results: - All except one from third party usb adapters were not affected by increased buffer size to their advertised dwNtbOutMaxSize and dwNtbInMaxSize. Devices were generally reaching 912-940Mbps both download and upload. Only EDIMAX adapter experienced decreased download size from 929Mbps to 827Mbps with iper3, with openspeedtest decrease was from 968Mbps to 886Mbps. - DisplayLink DL-3xxx family devices experienced performance increase with iperf3 download from 300Mbps to 870Mbps and upload from 782Mbps to 844Mbps. With openspeedtest download increased from 556Mbps to 873Mbps and upload from 727Mbps to 973Mbps - DiplayLink DL-6xxx family devices are not affected by increased buffer size. Signed-off-by: Łukasz Spintzyk Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220720060518.541-2-lukasz.spintzyk@synaptics.com Signed-off-by: Paolo Abeni --- include/linux/usb/cdc_ncm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index f7cb3ddce7fb..2d207cb4837d 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -53,8 +53,8 @@ #define USB_CDC_NCM_NDP32_LENGTH_MIN 0x20 /* Maximum NTB length */ -#define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ -#define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ +#define CDC_NCM_NTB_MAX_SIZE_TX 65536 /* bytes */ +#define CDC_NCM_NTB_MAX_SIZE_RX 65536 /* bytes */ /* Initial NTB length */ #define CDC_NCM_NTB_DEF_SIZE_TX 16384 /* bytes */ -- cgit v1.2.3 From ab21d6063c01180a8e9b22a37b847e5819525d9f Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:33 +0200 Subject: bpf: Introduce 8-byte BTF set Introduce support for defining flags for kfuncs using a new set of macros, BTF_SET8_START/BTF_SET8_END, which define a set which contains 8 byte elements (each of which consists of a pair of BTF ID and flags), using a new BTF_ID_FLAGS macro. This will be used to tag kfuncs registered for a certain program type as acquire, release, sleepable, ret_null, etc. without having to create more and more sets which was proving to be an unscalable solution. Now, when looking up whether a kfunc is allowed for a certain program, we can also obtain its kfunc flags in the same call and avoid further lookups. The resolve_btfids change is split into a separate patch. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 252a4befeab1..3cb0741e71d7 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -8,6 +8,15 @@ struct btf_id_set { u32 ids[]; }; +struct btf_id_set8 { + u32 cnt; + u32 flags; + struct { + u32 id; + u32 flags; + } pairs[]; +}; + #ifdef CONFIG_DEBUG_INFO_BTF #include /* for __PASTE */ @@ -25,7 +34,7 @@ struct btf_id_set { #define BTF_IDS_SECTION ".BTF_ids" -#define ____BTF_ID(symbol) \ +#define ____BTF_ID(symbol, word) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ ".local " #symbol " ; \n" \ @@ -33,10 +42,11 @@ asm( \ ".size " #symbol ", 4; \n" \ #symbol ": \n" \ ".zero 4 \n" \ +word \ ".popsection; \n"); -#define __BTF_ID(symbol) \ - ____BTF_ID(symbol) +#define __BTF_ID(symbol, word) \ + ____BTF_ID(symbol, word) #define __ID(prefix) \ __PASTE(prefix, __COUNTER__) @@ -46,7 +56,14 @@ asm( \ * to 4 zero bytes. */ #define BTF_ID(prefix, name) \ - __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__)) + __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__), "") + +#define ____BTF_ID_FLAGS(prefix, name, flags) \ + __BTF_ID(__ID(__BTF_ID__##prefix##__##name##__), ".long " #flags "\n") +#define __BTF_ID_FLAGS(prefix, name, flags, ...) \ + ____BTF_ID_FLAGS(prefix, name, flags) +#define BTF_ID_FLAGS(prefix, name, ...) \ + __BTF_ID_FLAGS(prefix, name, ##__VA_ARGS__, 0) /* * The BTF_ID_LIST macro defines pure (unsorted) list @@ -145,10 +162,51 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set name; +/* + * The BTF_SET8_START/END macros pair defines sorted list of + * BTF IDs and their flags plus its members count, with the + * following layout: + * + * BTF_SET8_START(list) + * BTF_ID_FLAGS(type1, name1, flags) + * BTF_ID_FLAGS(type2, name2, flags) + * BTF_SET8_END(list) + * + * __BTF_ID__set8__list: + * .zero 8 + * list: + * __BTF_ID__type1__name1__3: + * .zero 4 + * .word (1 << 0) | (1 << 2) + * __BTF_ID__type2__name2__5: + * .zero 4 + * .word (1 << 3) | (1 << 1) | (1 << 2) + * + */ +#define __BTF_SET8_START(name, scope) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +"." #scope " __BTF_ID__set8__" #name "; \n" \ +"__BTF_ID__set8__" #name ":; \n" \ +".zero 8 \n" \ +".popsection; \n"); + +#define BTF_SET8_START(name) \ +__BTF_ID_LIST(name, local) \ +__BTF_SET8_START(name, local) + +#define BTF_SET8_END(name) \ +asm( \ +".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ +".size __BTF_ID__set8__" #name ", .-" #name " \n" \ +".popsection; \n"); \ +extern struct btf_id_set8 name; + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) +#define BTF_ID_FLAGS(prefix, name, flags) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; @@ -156,6 +214,8 @@ extern struct btf_id_set name; #define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) +#define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; +#define BTF_SET8_END(name) static struct btf_id_set8 __maybe_unused name = { 0 }; #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.2.3 From a4703e3184320d6e15e2bc81d2ccf1c8c883f9d1 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:35 +0200 Subject: bpf: Switch to new kfunc flags infrastructure Instead of populating multiple sets to indicate some attribute and then researching the same BTF ID in them, prepare a single unified BTF set which indicates whether a kfunc is allowed to be called, and also its attributes if any at the same time. Now, only one call is needed to perform the lookup for both kfunc availability and its attributes. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +- include/linux/btf.h | 33 ++----- kernel/bpf/btf.c | 106 ++++++++++----------- kernel/bpf/verifier.c | 14 ++- net/bpf/test_run.c | 70 +++++--------- net/ipv4/bpf_tcp_ca.c | 18 ++-- net/ipv4/tcp_bbr.c | 24 ++--- net/ipv4/tcp_cubic.c | 20 ++-- net/ipv4/tcp_dctcp.c | 20 ++-- net/netfilter/nf_conntrack_bpf.c | 49 +++------- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 10 +- 11 files changed, 145 insertions(+), 222 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 11950029284f..a97751d845c9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1924,7 +1924,8 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs); + struct bpf_reg_state *regs, + u32 kfunc_flags); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, diff --git a/include/linux/btf.h b/include/linux/btf.h index 1bfed7fa0428..6dfc6eaf7f8c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,14 +12,11 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) -enum btf_kfunc_type { - BTF_KFUNC_TYPE_CHECK, - BTF_KFUNC_TYPE_ACQUIRE, - BTF_KFUNC_TYPE_RELEASE, - BTF_KFUNC_TYPE_RET_NULL, - BTF_KFUNC_TYPE_KPTR_ACQUIRE, - BTF_KFUNC_TYPE_MAX, -}; +/* These need to be macros, as the expressions are used in assembler input */ +#define KF_ACQUIRE (1 << 0) /* kfunc is an acquire function */ +#define KF_RELEASE (1 << 1) /* kfunc is a release function */ +#define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ +#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ struct btf; struct btf_member; @@ -30,16 +27,7 @@ struct btf_id_set; struct btf_kfunc_id_set { struct module *owner; - union { - struct { - struct btf_id_set *check_set; - struct btf_id_set *acquire_set; - struct btf_id_set *release_set; - struct btf_id_set *ret_null_set; - struct btf_id_set *kptr_acquire_set; - }; - struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; - }; + struct btf_id_set8 *set; }; struct btf_id_dtor_kfunc { @@ -378,9 +366,9 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); -bool btf_kfunc_id_set_contains(const struct btf *btf, +u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id); + u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); @@ -397,12 +385,11 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } -static inline bool btf_kfunc_id_set_contains(const struct btf *btf, +static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id) { - return false; + return NULL; } static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5869f03bcb6e..4d9c2d88720f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -213,7 +213,7 @@ enum { }; struct btf_kfunc_set_tab { - struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; + struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; }; struct btf_id_dtor_kfunc_tab { @@ -1616,7 +1616,7 @@ static void btf_free_id(struct btf *btf) static void btf_free_kfunc_set_tab(struct btf *btf) { struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; - int hook, type; + int hook; if (!tab) return; @@ -1625,10 +1625,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf) */ if (btf_is_module(btf)) goto free_tab; - for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) { - for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) - kfree(tab->sets[hook][type]); - } + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) + kfree(tab->sets[hook]); free_tab: kfree(tab); btf->kfunc_set_tab = NULL; @@ -6172,7 +6170,8 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf, static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - bool ptr_to_mem_ok) + bool ptr_to_mem_ok, + u32 kfunc_flags) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); struct bpf_verifier_log *log = &env->log; @@ -6210,10 +6209,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (is_kfunc) { /* Only kfunc can be release func */ - rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_RELEASE, func_id); - kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id); + rel = kfunc_flags & KF_RELEASE; + kptr_get = kfunc_flags & KF_KPTR_GET; } /* check that BTF function arguments match actual types that the @@ -6442,7 +6439,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global); + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, 0); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. @@ -6455,9 +6452,10 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs) + struct bpf_reg_state *regs, + u32 kfunc_flags) { - return btf_check_func_arg_match(env, btf, func_id, regs, true); + return btf_check_func_arg_match(env, btf, func_id, regs, true, kfunc_flags); } /* Convert BTF of a function into bpf_reg_state if possible @@ -6854,6 +6852,11 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) +{ + return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); +} + enum { BTF_MODULE_F_LIVE = (1 << 0), }; @@ -7102,16 +7105,16 @@ BTF_TRACING_TYPE_xxx /* Kernel Function (kfunc) BTF ID set registration API */ -static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, - enum btf_kfunc_type type, - struct btf_id_set *add_set, bool vmlinux_set) +static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + struct btf_id_set8 *add_set) { + bool vmlinux_set = !btf_is_module(btf); struct btf_kfunc_set_tab *tab; - struct btf_id_set *set; + struct btf_id_set8 *set; u32 set_cnt; int ret; - if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) { + if (hook >= BTF_KFUNC_HOOK_MAX) { ret = -EINVAL; goto end; } @@ -7127,7 +7130,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, btf->kfunc_set_tab = tab; } - set = tab->sets[hook][type]; + set = tab->sets[hook]; /* Warn when register_btf_kfunc_id_set is called twice for the same hook * for module sets. */ @@ -7141,7 +7144,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, * pointer and return. */ if (!vmlinux_set) { - tab->sets[hook][type] = add_set; + tab->sets[hook] = add_set; return 0; } @@ -7150,7 +7153,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, * and concatenate all individual sets being registered. While each set * is individually sorted, they may become unsorted when concatenated, * hence re-sorting the final set again is required to make binary - * searching the set using btf_id_set_contains function work. + * searching the set using btf_id_set8_contains function work. */ set_cnt = set ? set->cnt : 0; @@ -7165,8 +7168,8 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, } /* Grow set */ - set = krealloc(tab->sets[hook][type], - offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]), + set = krealloc(tab->sets[hook], + offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]), GFP_KERNEL | __GFP_NOWARN); if (!set) { ret = -ENOMEM; @@ -7174,15 +7177,15 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, } /* For newly allocated set, initialize set->cnt to 0 */ - if (!tab->sets[hook][type]) + if (!tab->sets[hook]) set->cnt = 0; - tab->sets[hook][type] = set; + tab->sets[hook] = set; /* Concatenate the two sets */ - memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0])); + memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); set->cnt += add_set->cnt; - sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL); + sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); return 0; end: @@ -7190,38 +7193,25 @@ end: return ret; } -static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, - const struct btf_kfunc_id_set *kset) -{ - bool vmlinux_set = !btf_is_module(btf); - int type, ret = 0; - - for (type = 0; type < ARRAY_SIZE(kset->sets); type++) { - if (!kset->sets[type]) - continue; - - ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set); - if (ret) - break; - } - return ret; -} - -static bool __btf_kfunc_id_set_contains(const struct btf *btf, +static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_hook hook, - enum btf_kfunc_type type, u32 kfunc_btf_id) { - struct btf_id_set *set; + struct btf_id_set8 *set; + u32 *id; - if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) - return false; + if (hook >= BTF_KFUNC_HOOK_MAX) + return NULL; if (!btf->kfunc_set_tab) - return false; - set = btf->kfunc_set_tab->sets[hook][type]; + return NULL; + set = btf->kfunc_set_tab->sets[hook]; if (!set) - return false; - return btf_id_set_contains(set, kfunc_btf_id); + return NULL; + id = btf_id_set8_contains(set, kfunc_btf_id); + if (!id) + return NULL; + /* The flags for BTF ID are located next to it */ + return id + 1; } static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) @@ -7249,14 +7239,14 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) * keeping the reference for the duration of the call provides the necessary * protection for looking up a well-formed btf->kfunc_set_tab. */ -bool btf_kfunc_id_set_contains(const struct btf *btf, +u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id) + u32 kfunc_btf_id) { enum btf_kfunc_hook hook; hook = bpf_prog_type_to_kfunc_hook(prog_type); - return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id); + return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); } /* This function must be invoked only from initcalls/module init functions */ @@ -7283,7 +7273,7 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, return PTR_ERR(btf); hook = bpf_prog_type_to_kfunc_hook(prog_type); - ret = btf_populate_kfunc_set(btf, hook, kset); + ret = btf_populate_kfunc_set(btf, hook, kset->set); btf_put(btf); return ret; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7c1e056624f9..096fdac70165 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7562,6 +7562,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int err, insn_idx = *insn_idx_p; const struct btf_param *args; struct btf *desc_btf; + u32 *kfunc_flags; bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ @@ -7577,18 +7578,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_name = btf_name_by_offset(desc_btf, func->name_off); func_proto = btf_type_by_id(desc_btf, func->type); - if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_CHECK, func_id)) { + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); + if (!kfunc_flags) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } - - acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_ACQUIRE, func_id); + acq = *kfunc_flags & KF_ACQUIRE; /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, *kfunc_flags); if (err < 0) return err; /* In case of release function, we get register number of refcounted @@ -7632,8 +7631,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; - if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_RET_NULL, func_id)) { + if (*kfunc_flags & KF_RET_NULL) { regs[BPF_REG_0].type |= PTR_MAYBE_NULL; /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ regs[BPF_REG_0].id = ++env->id_gen; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dc9dc0bedca0..ca5b7234a350 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -695,48 +695,26 @@ __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); -BTF_SET_START(test_sk_check_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test1) -BTF_ID(func, bpf_kfunc_call_test2) -BTF_ID(func, bpf_kfunc_call_test3) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_release) -BTF_ID(func, bpf_kfunc_call_memb_release) -BTF_ID(func, bpf_kfunc_call_memb1_release) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_ID(func, bpf_kfunc_call_test_pass_ctx) -BTF_ID(func, bpf_kfunc_call_test_pass1) -BTF_ID(func, bpf_kfunc_call_test_pass2) -BTF_ID(func, bpf_kfunc_call_test_fail1) -BTF_ID(func, bpf_kfunc_call_test_fail2) -BTF_ID(func, bpf_kfunc_call_test_fail3) -BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1) -BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1) -BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_SET_END(test_sk_check_kfunc_ids) - -BTF_SET_START(test_sk_acquire_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_acquire_kfunc_ids) - -BTF_SET_START(test_sk_release_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_release) -BTF_ID(func, bpf_kfunc_call_memb_release) -BTF_ID(func, bpf_kfunc_call_memb1_release) -BTF_SET_END(test_sk_release_kfunc_ids) - -BTF_SET_START(test_sk_ret_null_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_ret_null_kfunc_ids) - -BTF_SET_START(test_sk_kptr_acquire_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_kptr_acquire_kfunc_ids) +BTF_SET8_START(test_sk_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_kfunc_call_test1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) @@ -1620,12 +1598,8 @@ out: } static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &test_sk_check_kfunc_ids, - .acquire_set = &test_sk_acquire_kfunc_ids, - .release_set = &test_sk_release_kfunc_ids, - .ret_null_set = &test_sk_ret_null_kfunc_ids, - .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids + .owner = THIS_MODULE, + .set = &test_sk_check_kfunc_ids, }; BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 7a181631b995..85a9e500c42d 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -197,17 +197,17 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET_START(bpf_tcp_ca_check_kfunc_ids) -BTF_ID(func, tcp_reno_ssthresh) -BTF_ID(func, tcp_reno_cong_avoid) -BTF_ID(func, tcp_reno_undo_cwnd) -BTF_ID(func, tcp_slow_start) -BTF_ID(func, tcp_cong_avoid_ai) -BTF_SET_END(bpf_tcp_ca_check_kfunc_ids) +BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) +BTF_ID_FLAGS(func, tcp_reno_ssthresh) +BTF_ID_FLAGS(func, tcp_reno_cong_avoid) +BTF_ID_FLAGS(func, tcp_reno_undo_cwnd) +BTF_ID_FLAGS(func, tcp_slow_start) +BTF_ID_FLAGS(func, tcp_cong_avoid_ai) +BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_tcp_ca_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_tcp_ca_check_kfunc_ids, }; static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 075e744bfb48..54eec33c6e1c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1154,24 +1154,24 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET_START(tcp_bbr_check_kfunc_ids) +BTF_SET8_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, bbr_init) -BTF_ID(func, bbr_main) -BTF_ID(func, bbr_sndbuf_expand) -BTF_ID(func, bbr_undo_cwnd) -BTF_ID(func, bbr_cwnd_event) -BTF_ID(func, bbr_ssthresh) -BTF_ID(func, bbr_min_tso_segs) -BTF_ID(func, bbr_set_state) +BTF_ID_FLAGS(func, bbr_init) +BTF_ID_FLAGS(func, bbr_main) +BTF_ID_FLAGS(func, bbr_sndbuf_expand) +BTF_ID_FLAGS(func, bbr_undo_cwnd) +BTF_ID_FLAGS(func, bbr_cwnd_event) +BTF_ID_FLAGS(func, bbr_ssthresh) +BTF_ID_FLAGS(func, bbr_min_tso_segs) +BTF_ID_FLAGS(func, bbr_set_state) #endif #endif -BTF_SET_END(tcp_bbr_check_kfunc_ids) +BTF_SET8_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_bbr_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_bbr_check_kfunc_ids, }; static int __init bbr_register(void) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 68178e7280ce..768c10c1f649 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,22 +485,22 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET_START(tcp_cubic_check_kfunc_ids) +BTF_SET8_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, cubictcp_init) -BTF_ID(func, cubictcp_recalc_ssthresh) -BTF_ID(func, cubictcp_cong_avoid) -BTF_ID(func, cubictcp_state) -BTF_ID(func, cubictcp_cwnd_event) -BTF_ID(func, cubictcp_acked) +BTF_ID_FLAGS(func, cubictcp_init) +BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) +BTF_ID_FLAGS(func, cubictcp_cong_avoid) +BTF_ID_FLAGS(func, cubictcp_state) +BTF_ID_FLAGS(func, cubictcp_cwnd_event) +BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif -BTF_SET_END(tcp_cubic_check_kfunc_ids) +BTF_SET8_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_cubic_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_cubic_check_kfunc_ids, }; static int __init cubictcp_register(void) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index ab034a4e9324..2a6c0dd665a4 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -239,22 +239,22 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET_START(tcp_dctcp_check_kfunc_ids) +BTF_SET8_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, dctcp_init) -BTF_ID(func, dctcp_update_alpha) -BTF_ID(func, dctcp_cwnd_event) -BTF_ID(func, dctcp_ssthresh) -BTF_ID(func, dctcp_cwnd_undo) -BTF_ID(func, dctcp_state) +BTF_ID_FLAGS(func, dctcp_init) +BTF_ID_FLAGS(func, dctcp_update_alpha) +BTF_ID_FLAGS(func, dctcp_cwnd_event) +BTF_ID_FLAGS(func, dctcp_ssthresh) +BTF_ID_FLAGS(func, dctcp_cwnd_undo) +BTF_ID_FLAGS(func, dctcp_state) #endif #endif -BTF_SET_END(tcp_dctcp_check_kfunc_ids) +BTF_SET8_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_dctcp_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_dctcp_check_kfunc_ids, }; static int __init dctcp_register(void) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index bc4d5cd63a94..cf2096f65d0e 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -219,48 +219,21 @@ void bpf_ct_release(struct nf_conn *nfct) __diag_pop() -BTF_SET_START(nf_ct_xdp_check_kfunc_ids) -BTF_ID(func, bpf_xdp_ct_lookup) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_xdp_check_kfunc_ids) - -BTF_SET_START(nf_ct_tc_check_kfunc_ids) -BTF_ID(func, bpf_skb_ct_lookup) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_tc_check_kfunc_ids) - -BTF_SET_START(nf_ct_acquire_kfunc_ids) -BTF_ID(func, bpf_xdp_ct_lookup) -BTF_ID(func, bpf_skb_ct_lookup) -BTF_SET_END(nf_ct_acquire_kfunc_ids) - -BTF_SET_START(nf_ct_release_kfunc_ids) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_release_kfunc_ids) - -/* Both sets are identical */ -#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids - -static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &nf_ct_xdp_check_kfunc_ids, - .acquire_set = &nf_ct_acquire_kfunc_ids, - .release_set = &nf_ct_release_kfunc_ids, - .ret_null_set = &nf_ct_ret_null_kfunc_ids, -}; - -static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &nf_ct_tc_check_kfunc_ids, - .acquire_set = &nf_ct_acquire_kfunc_ids, - .release_set = &nf_ct_release_kfunc_ids, - .ret_null_set = &nf_ct_ret_null_kfunc_ids, +BTF_SET8_START(nf_ct_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE) +BTF_SET8_END(nf_ct_kfunc_set) + +static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_ct_kfunc_set, }; int register_nf_conntrack_bpf(void) { int ret; - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index e585e1cefc77..792cb15bac40 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -148,13 +148,13 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_check_kfunc_ids) -BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_check_kfunc_ids) +BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_testmod_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_testmod_check_kfunc_ids, }; extern int bpf_fentry_test1(int a); -- cgit v1.2.3 From 56e948ffc098a780fefb6c1784a3a2c7b81100a1 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:36 +0200 Subject: bpf: Add support for forcing kfunc args to be trusted Teach the verifier to detect a new KF_TRUSTED_ARGS kfunc flag, which means each pointer argument must be trusted, which we define as a pointer that is referenced (has non-zero ref_obj_id) and also needs to have its offset unchanged, similar to how release functions expect their argument. This allows a kfunc to receive pointer arguments unchanged from the result of the acquire kfunc. This is required to ensure that kfunc that operate on some object only work on acquired pointers and not normal PTR_TO_BTF_ID with same type which can be obtained by pointer walking. The restrictions applied to release arguments also apply to trusted arguments. This implies that strict type matching (not deducing type by recursively following members at offset) and OBJ_RELEASE offset checks (ensuring they are zero) are used for trusted pointer arguments. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 32 ++++++++++++++++++++++++++++++++ kernel/bpf/btf.c | 17 ++++++++++++++--- net/bpf/test_run.c | 5 +++++ 3 files changed, 51 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 6dfc6eaf7f8c..cdb376d53238 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -17,6 +17,38 @@ #define KF_RELEASE (1 << 1) /* kfunc is a release function */ #define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ #define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ +/* Trusted arguments are those which are meant to be referenced arguments with + * unchanged offset. It is used to enforce that pointers obtained from acquire + * kfuncs remain unmodified when being passed to helpers taking trusted args. + * + * Consider + * struct foo { + * int data; + * struct foo *next; + * }; + * + * struct bar { + * int data; + * struct foo f; + * }; + * + * struct foo *f = alloc_foo(); // Acquire kfunc + * struct bar *b = alloc_bar(); // Acquire kfunc + * + * If a kfunc set_foo_data() wants to operate only on the allocated object, it + * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like: + * + * set_foo_data(f, 42); // Allowed + * set_foo_data(f->next, 42); // Rejected, non-referenced pointer + * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type + * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset + * + * In the final case, usually for the purposes of type matching, it is deduced + * by looking at the type of the member at the offset, but due to the + * requirement of trusted argument, this deduction will be strict and not done + * for this case. + */ +#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ struct btf; struct btf_member; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4d9c2d88720f..7ac971ea98d1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6174,10 +6174,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, u32 kfunc_flags) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); + bool rel = false, kptr_get = false, trusted_arg = false; struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); - bool rel = false, kptr_get = false; const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; @@ -6211,6 +6211,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, /* Only kfunc can be release func */ rel = kfunc_flags & KF_RELEASE; kptr_get = kfunc_flags & KF_KPTR_GET; + trusted_arg = kfunc_flags & KF_TRUSTED_ARGS; } /* check that BTF function arguments match actual types that the @@ -6235,10 +6236,19 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + /* Check if argument must be a referenced pointer, args + i has + * been verified to be a pointer (after skipping modifiers). + */ + if (is_kfunc && trusted_arg && !reg->ref_obj_id) { + bpf_log(log, "R%d must be referenced\n", regno); + return -EINVAL; + } + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - if (rel && reg->ref_obj_id) + /* Trusted args have the same offset checks as release arguments */ + if (trusted_arg || (rel && reg->ref_obj_id)) arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) @@ -6336,7 +6346,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, - reg->off, btf, ref_id, rel && reg->ref_obj_id)) { + reg->off, btf, ref_id, + trusted_arg || (rel && reg->ref_obj_id))) { bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", func_name, i, btf_type_str(ref_t), ref_tname, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index ca5b7234a350..cbc9cd5058cb 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -691,6 +691,10 @@ noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) { } +noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) +{ +} + __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); @@ -714,6 +718,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS) BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, -- cgit v1.2.3 From 949d6b405e6160ae44baea39192d67b39cb7eeac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 20 Jul 2022 16:57:58 -0700 Subject: net: add missing includes and forward declarations under net/ This patch adds missing includes to headers under include/net. All these problems are currently masked by the existing users including the missing dependency before the broken header. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/lapb.h | 5 +++++ include/net/af_vsock.h | 1 + include/net/amt.h | 3 +++ include/net/ax88796.h | 2 ++ include/net/bond_options.h | 8 ++++++++ include/net/codel_qdisc.h | 1 + include/net/datalink.h | 7 +++++++ include/net/dcbevent.h | 2 ++ include/net/dcbnl.h | 2 ++ include/net/dn_dev.h | 1 + include/net/dn_fib.h | 2 ++ include/net/dn_neigh.h | 2 ++ include/net/dn_nsp.h | 6 ++++++ include/net/dn_route.h | 3 +++ include/net/erspan.h | 3 +++ include/net/esp.h | 1 + include/net/ethoc.h | 3 +++ include/net/firewire.h | 2 ++ include/net/fq.h | 4 ++++ include/net/garp.h | 2 ++ include/net/gtp.h | 4 ++++ include/net/gue.h | 3 +++ include/net/hwbm.h | 2 ++ include/net/ila.h | 2 ++ include/net/inet6_connection_sock.h | 2 ++ include/net/inet_common.h | 6 ++++++ include/net/inet_frag.h | 3 +++ include/net/ip6_route.h | 20 ++++++++++---------- include/net/ipcomp.h | 2 ++ include/net/ipconfig.h | 2 ++ include/net/llc_c_ac.h | 7 +++++++ include/net/llc_c_st.h | 4 ++++ include/net/llc_s_ac.h | 4 ++++ include/net/llc_s_ev.h | 1 + include/net/mpls_iptunnel.h | 3 +++ include/net/mrp.h | 4 ++++ include/net/ncsi.h | 2 ++ include/net/netevent.h | 1 + include/net/netns/can.h | 1 + include/net/netns/core.h | 2 ++ include/net/netns/generic.h | 1 + include/net/netns/ipv4.h | 1 + include/net/netns/mctp.h | 1 + include/net/netns/mpls.h | 2 ++ include/net/netns/nexthop.h | 1 + include/net/netns/sctp.h | 3 +++ include/net/netns/unix.h | 2 ++ include/net/netrom.h | 1 + include/net/p8022.h | 5 +++++ include/net/phonet/pep.h | 3 +++ include/net/phonet/phonet.h | 4 ++++ include/net/phonet/pn_dev.h | 5 +++++ include/net/pptp.h | 3 +++ include/net/psnap.h | 5 +++++ include/net/regulatory.h | 3 +++ include/net/rose.h | 1 + include/net/secure_seq.h | 2 ++ include/net/smc.h | 7 +++++++ include/net/stp.h | 2 ++ include/net/transp_v6.h | 2 ++ include/net/tun_proto.h | 3 ++- include/net/udplite.h | 1 + include/net/xdp_priv.h | 1 + 63 files changed, 183 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lapb.h b/include/linux/lapb.h index eb56472f23b2..b5333f9413dc 100644 --- a/include/linux/lapb.h +++ b/include/linux/lapb.h @@ -6,6 +6,11 @@ #ifndef LAPB_KERNEL_H #define LAPB_KERNEL_H +#include +#include + +struct net_device; + #define LAPB_OK 0 #define LAPB_BADTOKEN 1 #define LAPB_INVALUE 2 diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f742e50207fb..1c53c4c4d88f 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -10,6 +10,7 @@ #include #include +#include #include #include "vsock_addr.h" diff --git a/include/net/amt.h b/include/net/amt.h index 08fc30cf2f34..c881bc8b673b 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -7,6 +7,9 @@ #include #include +#include +#include +#include enum amt_msg_type { AMT_MSG_DISCOVERY = 1, diff --git a/include/net/ax88796.h b/include/net/ax88796.h index 2ed23a368602..b658471f97f0 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -8,6 +8,8 @@ #ifndef __NET_AX88796_PLAT_H #define __NET_AX88796_PLAT_H +#include + struct sk_buff; struct net_device; struct platform_device; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index d2aea5cf1e41..69292ecc0325 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -7,6 +7,14 @@ #ifndef _NET_BOND_OPTIONS_H #define _NET_BOND_OPTIONS_H +#include +#include +#include +#include + +struct netlink_ext_ack; +struct nlattr; + #define BOND_OPT_MAX_NAMELEN 32 #define BOND_OPT_VALID(opt) ((opt) < BOND_OPT_LAST) #define BOND_MODE_ALL_EX(x) (~(x)) diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 58b6d0ebea10..7d3d9219f4fe 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -49,6 +49,7 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include #include /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ diff --git a/include/net/datalink.h b/include/net/datalink.h index d9b7faaa539f..c837ffc7ebf8 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -2,6 +2,13 @@ #ifndef _NET_INET_DATALINK_H_ #define _NET_INET_DATALINK_H_ +#include + +struct llc_sap; +struct net_device; +struct packet_type; +struct sk_buff; + struct datalink_proto { unsigned char type[8]; diff --git a/include/net/dcbevent.h b/include/net/dcbevent.h index 43e34131a53f..02700262f71a 100644 --- a/include/net/dcbevent.h +++ b/include/net/dcbevent.h @@ -8,6 +8,8 @@ #ifndef _DCB_EVENT_H #define _DCB_EVENT_H +struct notifier_block; + enum dcbevent_notif_type { DCB_APP_EVENT = 1, }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index e4ad58c4062c..2b2d86fb3131 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -10,6 +10,8 @@ #include +struct net_device; + struct dcb_app_type { int ifindex; struct dcb_app app; diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 595b4f6c1eb1..bec303ea8367 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -2,6 +2,7 @@ #ifndef _NET_DN_DEV_H #define _NET_DN_DEV_H +#include struct dn_dev; diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index ddd6565957b3..1929a3cd5ebe 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -4,6 +4,8 @@ #include #include +#include +#include extern const struct nla_policy rtm_dn_policy[]; diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h index 2e3e7793973a..1f7df98bfc33 100644 --- a/include/net/dn_neigh.h +++ b/include/net/dn_neigh.h @@ -2,6 +2,8 @@ #ifndef _NET_DN_NEIGH_H #define _NET_DN_NEIGH_H +#include + /* * The position of the first two fields of * this structure are critical - SJW diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index f83932b864a9..a4a18fee0b7c 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -6,6 +6,12 @@ *******************************************************************************/ /* dn_nsp.c functions prototyping */ +#include +#include +#include + +struct sk_buff; +struct sk_buff_head; void dn_nsp_send_data_ack(struct sock *sk); void dn_nsp_send_oth_ack(struct sock *sk); diff --git a/include/net/dn_route.h b/include/net/dn_route.h index 6f1e94ac0bdf..88c0300236cc 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -7,6 +7,9 @@ *******************************************************************************/ +#include +#include + struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *, struct sock *sk, int flags); diff --git a/include/net/erspan.h b/include/net/erspan.h index 0d9e86bd9893..6cb4cbd6a48f 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -58,6 +58,9 @@ * GRE proto ERSPAN type I/II = 0x88BE, type III = 0x22EB */ +#include +#include +#include #include #define ERSPAN_VERSION 0x1 /* ERSPAN type II */ diff --git a/include/net/esp.h b/include/net/esp.h index 9c5637d41d95..322950727dd0 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -5,6 +5,7 @@ #include struct ip_esp_hdr; +struct xfrm_state; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) { diff --git a/include/net/ethoc.h b/include/net/ethoc.h index 78519ed42ab4..73810f3ca492 100644 --- a/include/net/ethoc.h +++ b/include/net/ethoc.h @@ -10,6 +10,9 @@ #ifndef LINUX_NET_ETHOC_H #define LINUX_NET_ETHOC_H 1 +#include +#include + struct ethoc_platform_data { u8 hwaddr[IFHWADDRLEN]; s8 phy_id; diff --git a/include/net/firewire.h b/include/net/firewire.h index 299e5df38552..2442d645e412 100644 --- a/include/net/firewire.h +++ b/include/net/firewire.h @@ -2,6 +2,8 @@ #ifndef _NET_FIREWIRE_H #define _NET_FIREWIRE_H +#include + /* Pseudo L2 address */ #define FWNET_ALEN 16 union fwnet_hwaddr { diff --git a/include/net/fq.h b/include/net/fq.h index 2eccbbd2b559..07b5aff6ec58 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -7,6 +7,10 @@ #ifndef __NET_SCHED_FQ_H #define __NET_SCHED_FQ_H +#include +#include +#include + struct fq_tin; /** diff --git a/include/net/garp.h b/include/net/garp.h index 4d9a0c6a2e5f..59a07b171def 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -2,6 +2,8 @@ #ifndef _NET_GARP_H #define _NET_GARP_H +#include +#include #include #define GARP_PROTOCOL_ID 0x1 diff --git a/include/net/gtp.h b/include/net/gtp.h index c1d6169df331..2a503f035d18 100644 --- a/include/net/gtp.h +++ b/include/net/gtp.h @@ -2,6 +2,10 @@ #ifndef _GTP_H_ #define _GTP_H_ +#include +#include +#include + /* General GTP protocol related definitions. */ #define GTP0_PORT 3386 diff --git a/include/net/gue.h b/include/net/gue.h index e42402f180b7..dfca298bec9c 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -30,6 +30,9 @@ * may refer to options placed after this field. */ +#include +#include + struct guehdr { union { struct { diff --git a/include/net/hwbm.h b/include/net/hwbm.h index c81444611a22..aa495decec35 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -2,6 +2,8 @@ #ifndef _HWBM_H #define _HWBM_H +#include + struct hwbm_pool { /* Capacity of the pool */ int size; diff --git a/include/net/ila.h b/include/net/ila.h index f98dcd5791b0..73ebe5eab272 100644 --- a/include/net/ila.h +++ b/include/net/ila.h @@ -8,6 +8,8 @@ #ifndef _NET_ILA_H #define _NET_ILA_H +struct sk_buff; + int ila_xlat_outgoing(struct sk_buff *skb); int ila_xlat_incoming(struct sk_buff *skb); diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 7392f959a405..025bd8d3c769 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -11,6 +11,8 @@ #include +struct flowi; +struct flowi6; struct request_sock; struct sk_buff; struct sock; diff --git a/include/net/inet_common.h b/include/net/inet_common.h index cad2a611efde..cec453c18f1d 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -3,6 +3,10 @@ #define _INET_COMMON_H #include +#include +#include +#include +#include extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; @@ -12,6 +16,8 @@ extern const struct proto_ops inet_dgram_ops; */ struct msghdr; +struct net; +struct page; struct sock; struct sockaddr; struct socket; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 911ad930867d..0b0876610553 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -4,6 +4,9 @@ #include #include +#include +#include +#include /* Per netns frag queues directory */ struct fqdir { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ca2d6b60e1ec..035d61d50a98 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -2,6 +2,16 @@ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H +#include +#include +#include +#include +#include +#include +#include +#include +#include + struct route_info { __u8 type; __u8 length; @@ -19,16 +29,6 @@ struct route_info { __u8 prefix[]; /* 0,8 or 16 */ }; -#include -#include -#include -#include -#include -#include -#include -#include -#include - #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 #define RT6_LOOKUP_F_HAS_SADDR 0x00000004 diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index fee6fc451597..c31108295079 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -2,11 +2,13 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include #include #define IPCOMP_SCRATCH_SIZE 65400 struct crypto_comp; +struct ip_comp_hdr; struct ipcomp_data { u16 threshold; diff --git a/include/net/ipconfig.h b/include/net/ipconfig.h index e3534299bd2a..8276897d0c2e 100644 --- a/include/net/ipconfig.h +++ b/include/net/ipconfig.h @@ -7,6 +7,8 @@ /* The following are initdata: */ +#include + extern int ic_proto_enabled; /* Protocols enabled (see IC_xxx) */ extern int ic_set_manually; /* IPconfig parameters set manually */ diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h index e766300b3e99..3e1f76786d7b 100644 --- a/include/net/llc_c_ac.h +++ b/include/net/llc_c_ac.h @@ -16,6 +16,13 @@ * Connection state transition actions * (Fb = F bit; Pb = P bit; Xb = X bit) */ + +#include + +struct sk_buff; +struct sock; +struct timer_list; + #define LLC_CONN_AC_CLR_REMOTE_BUSY 1 #define LLC_CONN_AC_CONN_IND 2 #define LLC_CONN_AC_CONN_CONFIRM 3 diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h index 48f3f891b2f9..53823d61d8b6 100644 --- a/include/net/llc_c_st.h +++ b/include/net/llc_c_st.h @@ -11,6 +11,10 @@ * * See the GNU General Public License for more details. */ + +#include +#include + /* Connection component state management */ /* connection states */ #define LLC_CONN_OUT_OF_SVC 0 /* prior to allocation */ diff --git a/include/net/llc_s_ac.h b/include/net/llc_s_ac.h index a61b98c108ee..f71790305bc9 100644 --- a/include/net/llc_s_ac.h +++ b/include/net/llc_s_ac.h @@ -11,6 +11,10 @@ * * See the GNU General Public License for more details. */ + +struct llc_sap; +struct sk_buff; + /* SAP component actions */ #define SAP_ACT_UNITDATA_IND 1 #define SAP_ACT_SEND_UI 2 diff --git a/include/net/llc_s_ev.h b/include/net/llc_s_ev.h index 84db3a59ed28..fb7df1d70af3 100644 --- a/include/net/llc_s_ev.h +++ b/include/net/llc_s_ev.h @@ -13,6 +13,7 @@ */ #include +#include /* Defines SAP component events */ /* Types of events (possible values in 'ev->type') */ diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index 9deb3a3735da..0c71c27979fb 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -6,6 +6,9 @@ #ifndef _NET_MPLS_IPTUNNEL_H #define _NET_MPLS_IPTUNNEL_H 1 +#include +#include + struct mpls_iptunnel_encap { u8 labels; u8 ttl_propagate; diff --git a/include/net/mrp.h b/include/net/mrp.h index 1c308c034e1a..92cd3fb6cf9d 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -2,6 +2,10 @@ #ifndef _NET_MRP_H #define _NET_MRP_H +#include +#include +#include + #define MRP_END_MARK 0x0 struct mrp_pdu_hdr { diff --git a/include/net/ncsi.h b/include/net/ncsi.h index fbefe80361ee..08a50d9acb0a 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -2,6 +2,8 @@ #ifndef __NET_NCSI_H #define __NET_NCSI_H +#include + /* * The NCSI device states seen from external. More NCSI device states are * only visible internally (in net/ncsi/internal.h). When the NCSI device diff --git a/include/net/netevent.h b/include/net/netevent.h index 4107016c3bb4..1be3757a8b7f 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -14,6 +14,7 @@ struct dst_entry; struct neighbour; +struct notifier_block ; struct netevent_redirect { struct dst_entry *old; diff --git a/include/net/netns/can.h b/include/net/netns/can.h index 52fbd8291a96..48b79f7e6236 100644 --- a/include/net/netns/can.h +++ b/include/net/netns/can.h @@ -7,6 +7,7 @@ #define __NETNS_CAN_H__ #include +#include struct can_dev_rcv_lists; struct can_pkg_stats; diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 388244e315e7..8249060cf5d0 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -2,6 +2,8 @@ #ifndef __NETNS_CORE_H__ #define __NETNS_CORE_H__ +#include + struct ctl_table_header; struct prot_inuse; diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 8a1ab47c3fb3..7ce68183f6e1 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -8,6 +8,7 @@ #include #include +#include /* * Generic net pointers are to be used by modules to put some private diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ce0cc4e8d8c7..c7320ef356d9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ #include #include #include +#include #include struct ctl_table_header; diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h index acedef12a35e..1db8f9aaddb4 100644 --- a/include/net/netns/mctp.h +++ b/include/net/netns/mctp.h @@ -6,6 +6,7 @@ #ifndef __NETNS_MCTP_H__ #define __NETNS_MCTP_H__ +#include #include struct netns_mctp { diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index a7bdcfbb0b28..19ad2574b267 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -6,6 +6,8 @@ #ifndef __NETNS_MPLS_H__ #define __NETNS_MPLS_H__ +#include + struct mpls_route; struct ctl_table_header; diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h index 1849e77eb68a..434239b37014 100644 --- a/include/net/netns/nexthop.h +++ b/include/net/netns/nexthop.h @@ -6,6 +6,7 @@ #ifndef __NETNS_NEXTHOP_H__ #define __NETNS_NEXTHOP_H__ +#include #include struct netns_nexthop { diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 40240722cdca..a681147aecd8 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -2,6 +2,9 @@ #ifndef __NETNS_SCTP_H__ #define __NETNS_SCTP_H__ +#include +#include + struct sock; struct proc_dir_entry; struct sctp_mib; diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 6f1a33df061d..9859d134d5a8 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -5,6 +5,8 @@ #ifndef __NETNS_UNIX_H__ #define __NETNS_UNIX_H__ +#include + struct unix_table { spinlock_t *locks; struct hlist_head *buckets; diff --git a/include/net/netrom.h b/include/net/netrom.h index 80f15b1c1a48..f0565a5987d1 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -14,6 +14,7 @@ #include #include #include +#include #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 diff --git a/include/net/p8022.h b/include/net/p8022.h index c2bacc66bfbc..b690ffcad66b 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -1,6 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_P8022_H #define _NET_P8022_H + +struct net_device; +struct packet_type; +struct sk_buff; + struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 27b1ab5e4e6d..645dddf5ce77 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -10,6 +10,9 @@ #ifndef NET_PHONET_PEP_H #define NET_PHONET_PEP_H +#include +#include + struct pep_sock { struct pn_sock pn_sk; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index a27bdc6cfeab..862f1719b523 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -10,6 +10,10 @@ #ifndef AF_PHONET_H #define AF_PHONET_H +#include +#include +#include + /* * The lower layers may not require more space, ever. Make sure it's * enough. diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 05b49d4d2b11..e9dc8dca5817 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -10,6 +10,11 @@ #ifndef PN_DEV_H #define PN_DEV_H +#include +#include + +struct net; + struct phonet_device_list { struct list_head list; struct mutex lock; diff --git a/include/net/pptp.h b/include/net/pptp.h index 383e25ca53a7..e63176bdd4c8 100644 --- a/include/net/pptp.h +++ b/include/net/pptp.h @@ -2,6 +2,9 @@ #ifndef _NET_PPTP_H #define _NET_PPTP_H +#include +#include + #define PPP_LCP_ECHOREQ 0x09 #define PPP_LCP_ECHOREP 0x0A #define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) diff --git a/include/net/psnap.h b/include/net/psnap.h index 7cb0c8ab4171..88802b0754ad 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -2,6 +2,11 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H +struct datalink_proto; +struct sk_buff; +struct packet_type; +struct net_device; + struct datalink_proto * register_snap_client(const unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 47f06f6f5a67..896191f420d5 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -1,3 +1,4 @@ + #ifndef __NET_REGULATORY_H #define __NET_REGULATORY_H /* @@ -19,6 +20,8 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include +#include #include /** diff --git a/include/net/rose.h b/include/net/rose.h index 0f0a4ce0fee7..f192a64ddef2 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -9,6 +9,7 @@ #define _ROSE_H #include +#include #include #define ROSE_ADDR_LEN 5 diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index dac91aa38c5a..21e7fa2a1813 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,6 +4,8 @@ #include +struct net; + u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); diff --git a/include/net/smc.h b/include/net/smc.h index e441aa97ad61..37f829d9c6e5 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -11,6 +11,13 @@ #ifndef _SMC_H #define _SMC_H +#include +#include +#include +#include + +struct sock; + #define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ struct smc_hashinfo { diff --git a/include/net/stp.h b/include/net/stp.h index 2914e6d53490..528103fce2c0 100644 --- a/include/net/stp.h +++ b/include/net/stp.h @@ -2,6 +2,8 @@ #ifndef _NET_STP_H #define _NET_STP_H +#include + struct stp_proto { unsigned char group_address[ETH_ALEN]; void (*rcv)(const struct stp_proto *, struct sk_buff *, diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index da06613c9603..b830463e3dff 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -3,6 +3,7 @@ #define _TRANSP_V6_H #include +#include /* IPv6 transport protocols */ extern struct proto rawv6_prot; @@ -12,6 +13,7 @@ extern struct proto tcpv6_prot; extern struct proto pingv6_prot; struct flowi6; +struct ipcm6_cookie; /* extension headers */ int ipv6_exthdrs_init(void); diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h index 2ea3deba4c99..7b0de7852908 100644 --- a/include/net/tun_proto.h +++ b/include/net/tun_proto.h @@ -1,7 +1,8 @@ #ifndef __NET_TUN_PROTO_H #define __NET_TUN_PROTO_H -#include +#include +#include /* One byte protocol values as defined by VXLAN-GPE and NSH. These will * hopefully get a shared IANA registry. diff --git a/include/net/udplite.h b/include/net/udplite.h index a3c53110d30b..0143b373602e 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -6,6 +6,7 @@ #define _UDPLITE_H #include +#include /* UDP-Lite socket options */ #define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h index a2d58b1a12e1..c9df68d5f258 100644 --- a/include/net/xdp_priv.h +++ b/include/net/xdp_priv.h @@ -3,6 +3,7 @@ #define __LINUX_NET_XDP_PRIV_H__ #include +#include /* Private to net/core/xdp.c, but used by trace/events/xdp.h */ struct xdp_mem_allocator { -- cgit v1.2.3 From 0903f899418ee0235e4ad756f5502162f29b49b9 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Thu, 27 Jan 2022 14:39:46 +0200 Subject: wifi: ieee80211: add helper functions for detecting TM/FTM frames Add helper functions for detection timing measurement and fine timing measurement frames. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index cca564372d16..55e6f4ad0ca6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1332,6 +1332,15 @@ struct ieee80211_mgmt { u8 action_code; u8 variable[]; } __packed s1g; + struct { + u8 action_code; + u8 dialog_token; + u8 follow_up; + u32 tod; + u32 toa; + u8 max_tod_error; + u8 max_toa_error; + } __packed wnm_timing_msr; } u; } __packed action; } u; @@ -3522,6 +3531,17 @@ enum ieee80211_mesh_actioncode { WLAN_MESH_ACTION_TBTT_ADJUSTMENT_RESPONSE, }; +/* Unprotected WNM action codes */ +enum ieee80211_unprotected_wnm_actioncode { + WLAN_UNPROTECTED_WNM_ACTION_TIM = 0, + WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, +}; + +/* Public action codes */ +enum ieee80211_public_actioncode { + WLAN_PUBLIC_ACTION_FTM_RESPONSE = 33, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, @@ -4311,6 +4331,40 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) return true; } +static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (skb->len < IEEE80211_MIN_ACTION_SIZE) + return false; + + if (!ieee80211_is_action(mgmt->frame_control)) + return false; + + if (mgmt->u.action.category == WLAN_CATEGORY_WNM_UNPROTECTED && + mgmt->u.action.u.wnm_timing_msr.action_code == + WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE && + skb->len >= offsetofend(typeof(*mgmt), u.action.u.wnm_timing_msr)) + return true; + + return false; +} + +static inline bool ieee80211_is_ftm(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (!ieee80211_is_public_action((void *)mgmt, skb->len)) + return false; + + if (mgmt->u.action.u.ftm.action_code == + WLAN_PUBLIC_ACTION_FTM_RESPONSE && + skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm)) + return true; + + return false; +} + struct element { u8 id; u8 datalen; -- cgit v1.2.3 From e423414375866a399ebbe55ed044acb39846e8bf Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 22 Jul 2022 13:36:05 +0200 Subject: bpf: Fix build error in case of !CONFIG_DEBUG_INFO_BTF BTF_ID_FLAGS macro needs to be able to take 0 or 1 args, so make it a variable argument. BTF_SET8_END is incorrect, it should just be empty. Reported-by: kernel test robot Fixes: ab21d6063c01 ("bpf: Introduce 8-byte BTF set") Signed-off-by: Kumar Kartikeya Dwivedi Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20220722113605.6513-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 3cb0741e71d7..2aea877d644f 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -206,7 +206,7 @@ extern struct btf_id_set8 name; #define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) -#define BTF_ID_FLAGS(prefix, name, flags) +#define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED #define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; @@ -215,7 +215,7 @@ extern struct btf_id_set8 name; #define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; -#define BTF_SET8_END(name) static struct btf_id_set8 __maybe_unused name = { 0 }; +#define BTF_SET8_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ -- cgit v1.2.3 From f96f644ab97abeed3f7007c953836a574ce928cc Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:23 -0700 Subject: ftrace: Add modify_ftrace_direct_multi_nolock This is similar to modify_ftrace_direct_multi, but does not acquire direct_mutex. This is useful when direct_mutex is already locked by the user. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/bpf/20220720002126.803253-2-song@kernel.org --- include/linux/ftrace.h | 5 +++ kernel/trace/ftrace.c | 86 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 979f6bfa2c25..acb35243ce5d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -340,6 +340,7 @@ unsigned long ftrace_find_rec_direct(unsigned long ip); int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr); #else struct ftrace_ops; @@ -384,6 +385,10 @@ static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned lo { return -ENODEV; } +static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 601ccf1b2f09..5d67dc12231d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5691,22 +5691,8 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); -/** - * modify_ftrace_direct_multi - Modify an existing direct 'multi' call - * to call something else - * @ops: The address of the struct ftrace_ops object - * @addr: The address of the new trampoline to call at @ops functions - * - * This is used to unregister currently registered direct caller and - * register new one @addr on functions registered in @ops object. - * - * Note there's window between ftrace_shutdown and ftrace_startup calls - * where there will be no callbacks called. - * - * Returns: zero on success. Non zero on error, which includes: - * -EINVAL - The @ops object was not properly registered. - */ -int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +static int +__modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) { struct ftrace_hash *hash; struct ftrace_func_entry *entry, *iter; @@ -5717,12 +5703,7 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) int i, size; int err; - if (check_direct_multi(ops)) - return -EINVAL; - if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) - return -EINVAL; - - mutex_lock(&direct_mutex); + lockdep_assert_held_once(&direct_mutex); /* Enable the tmp_ops to have the same functions as the direct ops */ ftrace_ops_init(&tmp_ops); @@ -5730,7 +5711,7 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) err = register_ftrace_function(&tmp_ops); if (err) - goto out_direct; + return err; /* * Now the ftrace_ops_list_func() is called to do the direct callers. @@ -5754,7 +5735,64 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) /* Removing the tmp_ops will add the updated direct callers to the functions */ unregister_ftrace_function(&tmp_ops); - out_direct: + return err; +} + +/** + * modify_ftrace_direct_multi_nolock - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Caller should already have direct_mutex locked, so we don't lock + * direct_mutex here. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +{ + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + return __modify_ftrace_direct_multi(ops, addr); +} +EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock); + +/** + * modify_ftrace_direct_multi - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + int err; + + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + mutex_lock(&direct_mutex); + err = __modify_ftrace_direct_multi(ops, addr); mutex_unlock(&direct_mutex); return err; } -- cgit v1.2.3 From 53cd885bc5c3ea283cc9c00ca6446c778f00bfba Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:24 -0700 Subject: ftrace: Allow IPMODIFY and DIRECT ops on the same function IPMODIFY (livepatch) and DIRECT (bpf trampoline) ops are both important users of ftrace. It is necessary to allow them work on the same function at the same time. First, DIRECT ops no longer specify IPMODIFY flag. Instead, DIRECT flag is handled together with IPMODIFY flag in __ftrace_hash_update_ipmodify(). Then, a callback function, ops_func, is added to ftrace_ops. This is used by ftrace core code to understand whether the DIRECT ops can share with an IPMODIFY ops. To share with IPMODIFY ops, the DIRECT ops need to implement the callback function and adjust the direct trampoline accordingly. If DIRECT ops is attached before the IPMODIFY ops, ftrace core code calls ENABLE_SHARE_IPMODIFY_PEER on the DIRECT ops before registering the IPMODIFY ops. If IPMODIFY ops is attached before the DIRECT ops, ftrace core code calls ENABLE_SHARE_IPMODIFY_SELF in __ftrace_hash_update_ipmodify. Owner of the DIRECT ops may return 0 if the DIRECT trampoline can share with IPMODIFY, so error code otherwise. The error code is propagated to register_ftrace_direct_multi so that onwer of the DIRECT trampoline can handle it properly. For more details, please refer to comment before enum ftrace_ops_cmd. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/ Link: https://lore.kernel.org/all/20220718055449.3960512-1-song@kernel.org/ Link: https://lore.kernel.org/bpf/20220720002126.803253-3-song@kernel.org --- include/linux/ftrace.h | 38 ++++++++ kernel/trace/ftrace.c | 242 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 254 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index acb35243ce5d..0b61371e287b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -208,6 +208,43 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), }; +/* + * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes + * to a ftrace_ops. Note, the requests may fail. + * + * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the DIRECT ops is being registered. + * This is called with both direct_mutex and + * ftrace_lock are locked. + * + * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being registered. + * This is called with direct_mutex locked. + * + * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same + * function as an ops with IPMODIFY. Called + * when the other ops (the one with IPMODIFY) + * is being unregistered. + * This is called with direct_mutex locked. + */ +enum ftrace_ops_cmd { + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF, + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER, + FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER, +}; + +/* + * For most ftrace_ops_cmd, + * Returns: + * 0 - Success. + * Negative on failure. The return value is dependent on the + * callback. + */ +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd); + #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { @@ -250,6 +287,7 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; + ftrace_ops_func_t ops_func; #endif }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5d67dc12231d..bc921a3f7ea8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1861,6 +1861,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, ftrace_hash_rec_update_modify(ops, filter_hash, 1); } +static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip); + /* * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK * or no-needed to update, -EBUSY if it detects a conflict of the flag @@ -1869,6 +1871,13 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) * - If the hash is EMPTY_HASH, it hits nothing * - Anything else hits the recs which match the hash entries. + * + * DIRECT ops does not have IPMODIFY flag, but we still need to check it + * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call + * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with + * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate + * the return value to the caller and eventually to the owner of the DIRECT + * ops. */ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_hash *old_hash, @@ -1877,17 +1886,26 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_page *pg; struct dyn_ftrace *rec, *end = NULL; int in_old, in_new; + bool is_ipmodify, is_direct; /* Only update if the ops has been registered */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return 0; - if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY; + is_direct = ops->flags & FTRACE_OPS_FL_DIRECT; + + /* neither IPMODIFY nor DIRECT, skip */ + if (!is_ipmodify && !is_direct) + return 0; + + if (WARN_ON_ONCE(is_ipmodify && is_direct)) return 0; /* - * Since the IPMODIFY is a very address sensitive action, we do not - * allow ftrace_ops to set all functions to new hash. + * Since the IPMODIFY and DIRECT are very address sensitive + * actions, we do not allow ftrace_ops to set all functions to new + * hash. */ if (!new_hash || !old_hash) return -EINVAL; @@ -1905,12 +1923,32 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, continue; if (in_new) { - /* New entries must ensure no others are using it */ - if (rec->flags & FTRACE_FL_IPMODIFY) - goto rollback; - rec->flags |= FTRACE_FL_IPMODIFY; - } else /* Removed entry */ + if (rec->flags & FTRACE_FL_IPMODIFY) { + int ret; + + /* Cannot have two ipmodify on same rec */ + if (is_ipmodify) + goto rollback; + + FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT); + + /* + * Another ops with IPMODIFY is already + * attached. We are now attaching a direct + * ops. Run SHARE_IPMODIFY_SELF, to check + * whether sharing is supported. + */ + if (!ops->ops_func) + return -EBUSY; + ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF); + if (ret) + return ret; + } else if (is_ipmodify) { + rec->flags |= FTRACE_FL_IPMODIFY; + } + } else if (is_ipmodify) { rec->flags &= ~FTRACE_FL_IPMODIFY; + } } while_for_each_ftrace_rec(); return 0; @@ -2454,8 +2492,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops direct_ops = { .func = call_direct_funcs, - .flags = FTRACE_OPS_FL_IPMODIFY - | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS + .flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT, /* * By declaring the main trampoline as this trampoline @@ -3072,14 +3109,14 @@ static inline int ops_traces_mod(struct ftrace_ops *ops) } /* - * Check if the current ops references the record. + * Check if the current ops references the given ip. * * If the ops traces all functions, then it was already accounted for. * If the ops does not trace the current record function, skip it. * If the ops ignores the function via notrace filter, skip it. */ -static inline bool -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +static bool +ops_references_ip(struct ftrace_ops *ops, unsigned long ip) { /* If ops isn't enabled, ignore it */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) @@ -3091,16 +3128,29 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) /* The function must be in the filter */ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && - !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) + !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) return false; /* If in notrace hash, we ignore it too */ - if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) return false; return true; } +/* + * Check if the current ops references the record. + * + * If the ops traces all functions, then it was already accounted for. + * If the ops does not trace the current record function, skip it. + * If the ops ignores the function via notrace filter, skip it. + */ +static bool +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) +{ + return ops_references_ip(ops, rec->ip); +} + static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { bool init_nop = ftrace_need_init_nop(); @@ -5215,6 +5265,8 @@ static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) return direct; } +static int register_ftrace_function_nolock(struct ftrace_ops *ops); + /** * register_ftrace_direct - Call a custom trampoline directly * @ip: The address of the nop at the beginning of a function @@ -5286,7 +5338,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { - ret = register_ftrace_function(&direct_ops); + ret = register_ftrace_function_nolock(&direct_ops); if (ret) ftrace_set_filter_ip(&direct_ops, ip, 1, 0); } @@ -5545,8 +5597,7 @@ int modify_ftrace_direct(unsigned long ip, } EXPORT_SYMBOL_GPL(modify_ftrace_direct); -#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \ - FTRACE_OPS_FL_SAVE_REGS) +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) static int check_direct_multi(struct ftrace_ops *ops) { @@ -5639,7 +5690,7 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) ops->flags = MULTI_FLAGS; ops->trampoline = FTRACE_REGS_ADDR; - err = register_ftrace_function(ops); + err = register_ftrace_function_nolock(ops); out_remove: if (err) @@ -5709,7 +5760,7 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) ftrace_ops_init(&tmp_ops); tmp_ops.func_hash = ops->func_hash; - err = register_ftrace_function(&tmp_ops); + err = register_ftrace_function_nolock(&tmp_ops); if (err) return err; @@ -8003,6 +8054,143 @@ int ftrace_is_dead(void) return ftrace_disabled; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When registering ftrace_ops with IPMODIFY, it is necessary to make sure + * it doesn't conflict with any direct ftrace_ops. If there is existing + * direct ftrace_ops on a kernel function being patched, call + * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing. + * + * @ops: ftrace_ops being registered. + * + * Returns: + * 0 on success; + * Negative on failure. + */ +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i, ret; + + lockdep_assert_held_once(&direct_mutex); + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return 0; + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + if (found_op) { + if (!op->ops_func) + return -EBUSY; + + ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER); + if (ret) + return ret; + } + } + } + + return 0; +} + +/* + * Similar to prepare_direct_functions_for_ipmodify, clean up after ops + * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT + * ops. + */ +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ + struct ftrace_func_entry *entry; + struct ftrace_hash *hash; + struct ftrace_ops *op; + int size, i; + + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) + return; + + mutex_lock(&direct_mutex); + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + unsigned long ip = entry->ip; + bool found_op = false; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) + continue; + if (ops_references_ip(op, ip)) { + found_op = true; + break; + } + } while_for_each_ftrace_op(op); + mutex_unlock(&ftrace_lock); + + /* The cleanup is optional, ignore any errors */ + if (found_op && op->ops_func) + op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER); + } + } + mutex_unlock(&direct_mutex); +} + +#define lock_direct_mutex() mutex_lock(&direct_mutex) +#define unlock_direct_mutex() mutex_unlock(&direct_mutex) + +#else /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) +{ + return 0; +} + +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) +{ +} + +#define lock_direct_mutex() do { } while (0) +#define unlock_direct_mutex() do { } while (0) + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +/* + * Similar to register_ftrace_function, except we don't lock direct_mutex. + */ +static int register_ftrace_function_nolock(struct ftrace_ops *ops) +{ + int ret; + + ftrace_ops_init(ops); + + mutex_lock(&ftrace_lock); + + ret = ftrace_startup(ops, 0); + + mutex_unlock(&ftrace_lock); + + return ret; +} + /** * register_ftrace_function - register a function for profiling * @ops: ops structure that holds the function for profiling. @@ -8018,14 +8206,15 @@ int register_ftrace_function(struct ftrace_ops *ops) { int ret; - ftrace_ops_init(ops); - - mutex_lock(&ftrace_lock); - - ret = ftrace_startup(ops, 0); + lock_direct_mutex(); + ret = prepare_direct_functions_for_ipmodify(ops); + if (ret < 0) + goto out_unlock; - mutex_unlock(&ftrace_lock); + ret = register_ftrace_function_nolock(ops); +out_unlock: + unlock_direct_mutex(); return ret; } EXPORT_SYMBOL_GPL(register_ftrace_function); @@ -8044,6 +8233,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) ret = ftrace_shutdown(ops, 0); mutex_unlock(&ftrace_lock); + cleanup_direct_functions_after_ipmodify(ops); return ret; } EXPORT_SYMBOL_GPL(unregister_ftrace_function); -- cgit v1.2.3 From 316cba62dfb7878b7353177e6a7da9cc0c979cde Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 19 Jul 2022 17:21:25 -0700 Subject: bpf, x64: Allow to use caller address from stack Currently we call the original function by using the absolute address given at the JIT generation. That's not usable when having trampoline attached to multiple functions, or the target address changes dynamically (in case of live patch). In such cases we need to take the return address from the stack. Adding support to retrieve the original function address from the stack by adding new BPF_TRAMP_F_ORIG_STACK flag for arch_prepare_bpf_trampoline function. Basically we take the return address of the 'fentry' call: function + 0: call fentry # stores 'function + 5' address on stack function + 5: ... The 'function + 5' address will be used as the address for the original function to call. Signed-off-by: Jiri Olsa Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220720002126.803253-4-song@kernel.org --- arch/x86/net/bpf_jit_comp.c | 13 +++++++++---- include/linux/bpf.h | 5 +++++ 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 54c7f46c453f..e1b0c5ed0b7c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2119,10 +2119,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) { restore_regs(m, &prog, nr_args, regs_off); - /* call original function */ - if (emit_call(&prog, orig_call, prog)) { - ret = -EINVAL; - goto cleanup; + if (flags & BPF_TRAMP_F_ORIG_STACK) { + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); + EMIT2(0xff, 0xd0); /* call *rax */ + } else { + /* call original function */ + if (emit_call(&prog, orig_call, prog)) { + ret = -EINVAL; + goto cleanup; + } } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a97751d845c9..1d22df8bf306 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -751,6 +751,11 @@ struct btf_func_model { /* Return the return value of fentry prog. Only used by bpf_struct_ops. */ #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) +/* Get original function from stack instead of from provided direct address. + * Makes sense for trampolines with fexit or fmod_ret programs. + */ +#define BPF_TRAMP_F_ORIG_STACK BIT(5) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ -- cgit v1.2.3 From 00963a2e75a872e5fce4d0115ac2786ec86b57a6 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Jul 2022 17:21:26 -0700 Subject: bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch) When tracing a function with IPMODIFY ftrace_ops (livepatch), the bpf trampoline must follow the instruction pointer saved on stack. This needs extra handling for bpf trampolines with BPF_TRAMP_F_CALL_ORIG flag. Implement bpf_tramp_ftrace_ops_func and use it for the ftrace_ops used by BPF trampoline. This enables tracing functions with livepatch. This also requires moving bpf trampoline to *_ftrace_direct_mult APIs. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/all/20220602193706.2607681-2-song@kernel.org/ Link: https://lore.kernel.org/bpf/20220720002126.803253-5-song@kernel.org --- include/linux/bpf.h | 8 +++ kernel/bpf/trampoline.c | 158 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 149 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1d22df8bf306..20c26aed7896 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -47,6 +47,7 @@ struct kobject; struct mem_cgroup; struct module; struct bpf_func_state; +struct ftrace_ops; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -756,6 +757,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_ORIG_STACK BIT(5) +/* This trampoline is on a function with another ftrace_ops with IPMODIFY, + * e.g., a live patch. This flag is set and cleared by ftrace call backs, + */ +#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -838,9 +844,11 @@ struct bpf_tramp_image { struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; + struct ftrace_ops *fops; /* serializes access to fields of this trampoline */ struct mutex mutex; refcount_t refcnt; + u32 flags; u64 key; struct { struct btf_func_model model; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 6691dbf9e467..42e387a12694 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -13,6 +13,7 @@ #include #include #include +#include /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -29,6 +30,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; /* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex); + +static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd) +{ + struct bpf_trampoline *tr = ops->private; + int ret = 0; + + if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) { + /* This is called inside register_ftrace_direct_multi(), so + * tr->mutex is already locked. + */ + lockdep_assert_held_once(&tr->mutex); + + /* Instead of updating the trampoline here, we propagate + * -EAGAIN to register_ftrace_direct_multi(). Then we can + * retry register_ftrace_direct_multi() after updating the + * trampoline. + */ + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) { + if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY)) + return -EBUSY; + + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + return -EAGAIN; + } + + return 0; + } + + /* The normal locking order is + * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c) + * + * The following two commands are called from + * + * prepare_direct_functions_for_ipmodify + * cleanup_direct_functions_after_ipmodify + * + * In both cases, direct_mutex is already locked. Use + * mutex_trylock(&tr->mutex) to avoid deadlock in race condition + * (something else is making changes to this same trampoline). + */ + if (!mutex_trylock(&tr->mutex)) { + /* sleep 1 ms to make sure whatever holding tr->mutex makes + * some progress. + */ + msleep(1); + return -EAGAIN; + } + + switch (cmd) { + case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER: + tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY; + + if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) && + !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER: + tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY; + + if (tr->flags & BPF_TRAMP_F_ORIG_STACK) + ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */); + break; + default: + ret = -EINVAL; + break; + }; + + mutex_unlock(&tr->mutex); + return ret; +} +#endif + bool bpf_prog_has_trampoline(const struct bpf_prog *prog) { enum bpf_attach_type eatype = prog->expected_attach_type; @@ -89,6 +165,16 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) tr = kzalloc(sizeof(*tr), GFP_KERNEL); if (!tr) goto out; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL); + if (!tr->fops) { + kfree(tr); + tr = NULL; + goto out; + } + tr->fops->private = tr; + tr->fops->ops_func = bpf_tramp_ftrace_ops_func; +#endif tr->key = key; INIT_HLIST_NODE(&tr->hlist); @@ -128,7 +214,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) int ret; if (tr->func.ftrace_managed) - ret = unregister_ftrace_direct((long)ip, (long)old_addr); + ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr); else ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); @@ -137,15 +223,20 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) return ret; } -static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr) +static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr, + bool lock_direct_mutex) { void *ip = tr->func.addr; int ret; - if (tr->func.ftrace_managed) - ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr); - else + if (tr->func.ftrace_managed) { + if (lock_direct_mutex) + ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr); + else + ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); + } return ret; } @@ -163,10 +254,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) if (bpf_trampoline_module_get(tr)) return -ENOENT; - if (tr->func.ftrace_managed) - ret = register_ftrace_direct((long)ip, (long)new_addr); - else + if (tr->func.ftrace_managed) { + ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0); + ret = register_ftrace_direct_multi(tr->fops, (long)new_addr); + } else { ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); + } if (ret) bpf_trampoline_module_put(tr); @@ -332,11 +425,11 @@ out: return ERR_PTR(err); } -static int bpf_trampoline_update(struct bpf_trampoline *tr) +static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex) { struct bpf_tramp_image *im; struct bpf_tramp_links *tlinks; - u32 flags = BPF_TRAMP_F_RESTORE_REGS; + u32 orig_flags = tr->flags; bool ip_arg = false; int err, total; @@ -358,18 +451,31 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } + /* clear all bits except SHARE_IPMODIFY */ + tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY; + if (tlinks[BPF_TRAMP_FEXIT].nr_links || - tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) + tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME * should not be set together. */ - flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + } else { + tr->flags |= BPF_TRAMP_F_RESTORE_REGS; + } if (ip_arg) - flags |= BPF_TRAMP_F_IP_ARG; + tr->flags |= BPF_TRAMP_F_IP_ARG; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +again: + if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) && + (tr->flags & BPF_TRAMP_F_CALL_ORIG)) + tr->flags |= BPF_TRAMP_F_ORIG_STACK; +#endif err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, - &tr->func.model, flags, tlinks, + &tr->func.model, tr->flags, tlinks, tr->func.addr); if (err < 0) goto out; @@ -378,17 +484,34 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) WARN_ON(!tr->cur_image && tr->selector); if (tr->cur_image) /* progs already running at this address */ - err = modify_fentry(tr, tr->cur_image->image, im->image); + err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex); else /* first time registering */ err = register_fentry(tr, im->image); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + if (err == -EAGAIN) { + /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now + * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the + * trampoline again, and retry register. + */ + /* reset fops->func and fops->trampoline for re-register */ + tr->fops->func = NULL; + tr->fops->trampoline = 0; + goto again; + } +#endif if (err) goto out; + if (tr->cur_image) bpf_tramp_image_put(tr->cur_image); tr->cur_image = im; tr->selector++; out: + /* If any error happens, restore previous flags */ + if (err) + tr->flags = orig_flags; kfree(tlinks); return err; } @@ -454,7 +577,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; - err = bpf_trampoline_update(tr); + err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); if (err) { hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; @@ -487,7 +610,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ } hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; - return bpf_trampoline_update(tr); + return bpf_trampoline_update(tr, true /* lock_direct_mutex */); } /* bpf_trampoline_unlink_prog() should never fail. */ @@ -715,6 +838,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) * multiple rcu callbacks. */ hlist_del(&tr->hlist); + kfree(tr->fops); kfree(tr); out: mutex_unlock(&trampoline_mutex); -- cgit v1.2.3 From 46126db9c86110e5fc1e369b9bb89735ddefdae4 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Mon, 18 Jul 2022 14:18:10 +0200 Subject: flow_dissector: Add PPPoE dissectors Allow to dissect PPPoE specific fields which are: - session ID (16 bits) - ppp protocol (16 bits) - type (16 bits) - this is PPPoE ethertype, for now only ETH_P_PPP_SES is supported, possible ETH_P_PPP_DISC in the future The goal is to make the following TC command possible: # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \ flower \ pppoe_sid 12 \ ppp_proto ip \ action drop Note that only PPPoE Session is supported. Signed-off-by: Wojciech Drewek Acked-by: Guillaume Nault Signed-off-by: Tony Nguyen --- include/linux/ppp_defs.h | 14 ++++++++++++ include/net/flow_dissector.h | 13 +++++++++++ net/core/flow_dissector.c | 53 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 73 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h index 9d2b388fae1a..b7e57fdbd413 100644 --- a/include/linux/ppp_defs.h +++ b/include/linux/ppp_defs.h @@ -11,4 +11,18 @@ #include #define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c) + +/** + * ppp_proto_is_valid - checks if PPP protocol is valid + * @proto: PPP protocol + * + * Assumes proto is not compressed. + * Protocol is valid if the value is odd and the least significant bit of the + * most significant octet is 0 (see RFC 1661, section 2). + */ +static inline bool ppp_proto_is_valid(u16 proto) +{ + return !!((proto & 0x0101) == 0x0001); +} + #endif /* _PPP_DEFS_H_ */ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 0f9544a9bb9e..6c74812d64b2 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -277,6 +277,18 @@ struct flow_dissector_key_num_of_vlans { u8 num_of_vlans; }; +/** + * struct flow_dissector_key_pppoe: + * @session_id: pppoe session id + * @ppp_proto: ppp protocol + * @type: pppoe eth type + */ +struct flow_dissector_key_pppoe { + __be16 session_id; + __be16 ppp_proto; + __be16 type; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -307,6 +319,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ + FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6aee04f75e3e..237d396b6e41 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -895,6 +895,11 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, return result == BPF_OK; } +static bool is_pppoe_ses_hdr_valid(struct pppoe_hdr hdr) +{ + return hdr.ver == 1 && hdr.type == 1 && hdr.code == 0; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @net: associated network namespace, derived from @skb if NULL @@ -1214,26 +1219,60 @@ proto_again: struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; + u16 ppp_proto; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } - nhoff += PPPOE_SES_HLEN; - switch (hdr->proto) { - case htons(PPP_IP): + if (!is_pppoe_ses_hdr_valid(hdr->hdr)) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + /* least significant bit of the most significant octet + * indicates if protocol field was compressed + */ + ppp_proto = ntohs(hdr->proto); + if (ppp_proto & 0x0100) { + ppp_proto = ppp_proto >> 8; + nhoff += PPPOE_SES_HLEN - 1; + } else { + nhoff += PPPOE_SES_HLEN; + } + + if (ppp_proto == PPP_IP) { proto = htons(ETH_P_IP); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - case htons(PPP_IPV6): + } else if (ppp_proto == PPP_IPV6) { proto = htons(ETH_P_IPV6); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; - break; - default: + } else if (ppp_proto == PPP_MPLS_UC) { + proto = htons(ETH_P_MPLS_UC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto == PPP_MPLS_MC) { + proto = htons(ETH_P_MPLS_MC); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + } else if (ppp_proto_is_valid(ppp_proto)) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + } else { fdret = FLOW_DISSECT_RET_OUT_BAD; break; } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE)) { + struct flow_dissector_key_pppoe *key_pppoe; + + key_pppoe = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PPPOE, + target_container); + key_pppoe->session_id = hdr->hdr.sid; + key_pppoe->ppp_proto = htons(ppp_proto); + key_pppoe->type = htons(ETH_P_PPP_SES); + } break; } case htons(ETH_P_TIPC): { -- cgit v1.2.3 From 5f10376b6bc1e2773f56977980ab08c9e4fa91a7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Jul 2022 14:56:52 -0700 Subject: add missing includes and forward declarations to networking includes under linux/ Similarly to a recent include/net/ cleanup, this patch adds missing includes to networking headers under include/linux. All these problems are currently masked by the existing users including the missing dependency before the broken header. Link: https://lore.kernel.org/all/20220723045755.2676857-1-kuba@kernel.org/ v1 Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20220726215652.158167-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/linux/atm_tcp.h | 2 ++ include/linux/dsa/tag_qca.h | 5 +++++ include/linux/hippidevice.h | 4 ++++ include/linux/if_eql.h | 1 + include/linux/if_hsr.h | 4 ++++ include/linux/if_rmnet.h | 2 ++ include/linux/if_tap.h | 11 ++++++----- include/linux/mdio/mdio-xgene.h | 4 ++++ include/linux/nl802154.h | 2 ++ include/linux/phy_fixed.h | 3 +++ include/linux/ppp-comp.h | 2 +- include/linux/ppp_channel.h | 2 ++ include/linux/ptp_kvm.h | 2 ++ include/linux/ptp_pch.h | 4 ++++ include/linux/seq_file_net.h | 1 + include/linux/sungem_phy.h | 2 ++ include/linux/usb/usbnet.h | 6 ++++++ include/net/llc_s_st.h | 6 ++++++ 18 files changed, 57 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atm_tcp.h b/include/linux/atm_tcp.h index c8ecf6f68fb5..2558439d849b 100644 --- a/include/linux/atm_tcp.h +++ b/include/linux/atm_tcp.h @@ -9,6 +9,8 @@ #include +struct atm_vcc; +struct module; struct atm_tcp_ops { int (*attach)(struct atm_vcc *vcc,int itf); diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index 4359fb0221cf..50be7cbd93a5 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -3,6 +3,11 @@ #ifndef __TAG_QCA_H #define __TAG_QCA_H +#include + +struct dsa_switch; +struct sk_buff; + #define QCA_HDR_LEN 2 #define QCA_HDR_VERSION 0x2 diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9dc01f7ab5b4..07414c241e65 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -23,6 +23,10 @@ #ifdef __KERNEL__ +struct neigh_parms; +struct net_device; +struct sk_buff; + struct hippi_cb { __u32 ifield; }; diff --git a/include/linux/if_eql.h b/include/linux/if_eql.h index d75601d613cc..07f9b660b741 100644 --- a/include/linux/if_eql.h +++ b/include/linux/if_eql.h @@ -21,6 +21,7 @@ #include #include +#include #include typedef struct slave { diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 408539d5ea5f..0404f5bf4f30 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -2,6 +2,10 @@ #ifndef _LINUX_IF_HSR_H_ #define _LINUX_IF_HSR_H_ +#include + +struct net_device; + /* used to differentiate various protocols */ enum hsr_version { HSR_V0 = 0, diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 10e7521ecb6c..839d1e48b85e 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -5,6 +5,8 @@ #ifndef _LINUX_IF_RMNET_H_ #define _LINUX_IF_RMNET_H_ +#include + struct rmnet_map_header { u8 flags; /* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */ u8 mux_id; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 915a187cfabd..553552fa635c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -2,14 +2,18 @@ #ifndef _LINUX_IF_TAP_H_ #define _LINUX_IF_TAP_H_ +#include +#include + +struct file; +struct socket; + #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); struct ptr_ring *tap_get_ptr_ring(struct file *file); #else #include #include -struct file; -struct socket; static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); @@ -20,9 +24,6 @@ static inline struct ptr_ring *tap_get_ptr_ring(struct file *f) } #endif /* CONFIG_TAP */ -#include -#include - /* * Maximum times a tap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. diff --git a/include/linux/mdio/mdio-xgene.h b/include/linux/mdio/mdio-xgene.h index 8af93ada8b64..9e588965dc83 100644 --- a/include/linux/mdio/mdio-xgene.h +++ b/include/linux/mdio/mdio-xgene.h @@ -8,6 +8,10 @@ #ifndef __MDIO_XGENE_H__ #define __MDIO_XGENE_H__ +#include +#include +#include + #define BLOCK_XG_MDIO_CSR_OFFSET 0x5000 #define BLOCK_DIAG_CSR_OFFSET 0xd000 #define XGENET_CONFIG_REG_ADDR 0x20 diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index b22782225f27..cbe5fd1dd2e7 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -8,6 +8,8 @@ #ifndef NL802154_H #define NL802154_H +#include + #define IEEE802154_NL_NAME "802.15.4 MAC" #define IEEE802154_MCAST_COORD_NAME "coordinator" #define IEEE802154_MCAST_BEACON_NAME "beacon" diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 52bc8e487ef7..1acafd86ab13 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -2,6 +2,8 @@ #ifndef __PHY_FIXED_H #define __PHY_FIXED_H +#include + struct fixed_phy_status { int link; int speed; @@ -12,6 +14,7 @@ struct fixed_phy_status { struct device_node; struct gpio_desc; +struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); diff --git a/include/linux/ppp-comp.h b/include/linux/ppp-comp.h index 9d3ffc8f5ea6..fb847e47f148 100644 --- a/include/linux/ppp-comp.h +++ b/include/linux/ppp-comp.h @@ -9,7 +9,7 @@ #include - +struct compstat; struct module; /* diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index 91f9a928344e..45e6e427ceb8 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -20,6 +20,8 @@ #include #include +struct net_device_path; +struct net_device_path_ctx; struct ppp_channel; struct ppp_channel_ops { diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index f960a719f0d5..c2e28deef33a 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,6 +8,8 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include + struct timespec64; struct clocksource; diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h index 51818198c292..7ba643b62c15 100644 --- a/include/linux/ptp_pch.h +++ b/include/linux/ptp_pch.h @@ -10,6 +10,10 @@ #ifndef _PTP_PCH_H_ #define _PTP_PCH_H_ +#include + +struct pci_dev; + void pch_ch_control_write(struct pci_dev *pdev, u32 val); u32 pch_ch_event_read(struct pci_dev *pdev); void pch_ch_event_write(struct pci_dev *pdev, u32 val); diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h index b97912fdbae7..79638395bc32 100644 --- a/include/linux/seq_file_net.h +++ b/include/linux/seq_file_net.h @@ -3,6 +3,7 @@ #define __SEQ_FILE_NET_H__ #include +#include struct net; extern struct net init_net; diff --git a/include/linux/sungem_phy.h b/include/linux/sungem_phy.h index 3a11fa41a131..c505f30e8b68 100644 --- a/include/linux/sungem_phy.h +++ b/include/linux/sungem_phy.h @@ -2,6 +2,8 @@ #ifndef __SUNGEM_PHY_H__ #define __SUNGEM_PHY_H__ +#include + struct mii_phy; /* Operations supported by any kind of PHY */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 1b4d72d5e891..b42b72391a8d 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -23,6 +23,12 @@ #ifndef __LINUX_USB_USBNET_H #define __LINUX_USB_USBNET_H +#include +#include +#include +#include +#include + /* interface from usbnet core to each USB networking link we handle */ struct usbnet { /* housekeeping */ diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h index c4359e203013..ed5b2fa40d32 100644 --- a/include/net/llc_s_st.h +++ b/include/net/llc_s_st.h @@ -12,6 +12,12 @@ * See the GNU General Public License for more details. */ +#include +#include +#include + +struct llc_sap_state_trans; + #define LLC_NR_SAP_STATES 2 /* size of state table */ /* structures and types */ -- cgit v1.2.3 From 7fb48d25b5ce3bc488dbb019bf1736248181de9a Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 27 Jul 2022 19:16:34 +0900 Subject: can: dev: add generic function can_ethtool_op_get_ts_info_hwts() Add function can_ethtool_op_get_ts_info_hwts(). This function will be used by CAN devices with hardware TX/RX timestamping support to implement ethtool_ops::get_ts_info. This function does not offer support to activate/deactivate hardware timestamps at device level nor support the filter options (which is currently the case for all CAN devices with hardware timestamping support). The fact that hardware timestamp can not be deactivated at hardware level does not impact the userland. As long as the user do not set SO_TIMESTAMPING using a setsockopt() or ioctl(), the kernel will not emit TX timestamps (RX timestamps will still be reproted as it is the case currently). Drivers which need more fine grained control remains free to implement their own function, but we foresee that the generic function introduced here will be sufficient for the majority. Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/all/20220727101641.198847-8-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/dev.c | 21 +++++++++++++++++++++ include/linux/can/dev.h | 3 +++ 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 523eaacfe29e..f307034ff4fd 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -322,6 +322,27 @@ int can_change_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL_GPL(can_change_mtu); +/* generic implementation of ethtool_ops::get_ts_info for CAN devices + * supporting hardware timestamps + */ +int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, + struct ethtool_ts_info *info) +{ + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = -1; + info->tx_types = BIT(HWTSTAMP_TX_ON); + info->rx_filters = BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} +EXPORT_SYMBOL(can_ethtool_op_get_ts_info_hwts); + /* Common open function when the device gets opened. * * This function should be called in the open function of the device diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index e22dc03c850e..752bd45d8ebf 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -20,6 +20,7 @@ #include #include #include +#include #include /* @@ -162,6 +163,8 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); +int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, + struct ethtool_ts_info *info); int register_candev(struct net_device *dev); void unregister_candev(struct net_device *dev); -- cgit v1.2.3 From 90f942c5a6d775bad1be33ba214755314105da4a Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 27 Jul 2022 19:16:35 +0900 Subject: can: dev: add generic function can_eth_ioctl_hwts() Tools based on libpcap (such as tcpdump) expect the SIOCSHWTSTAMP ioctl call to be supported. This is also specified in the kernel doc [1]. The purpose of this ioctl is to toggle the hardware timestamps. Currently, CAN devices which support hardware timestamping have those always activated. can_eth_ioctl_hwts() is a dumb function that will always succeed when requested to set tx_type to HWTSTAMP_TX_ON or rx_filter to HWTSTAMP_FILTER_ALL. [1] Kernel doc: Timestamping, section 3.1 "Hardware Timestamping Implementation: Device Drivers" Link: https://docs.kernel.org/networking/timestamping.html#hardware-timestamping-implementation-device-drivers Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/all/20220727101641.198847-9-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/dev.c | 29 +++++++++++++++++++++++++++++ include/linux/can/dev.h | 1 + 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index f307034ff4fd..c1956b1e9faf 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -322,6 +322,35 @@ int can_change_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL_GPL(can_change_mtu); +/* generic implementation of netdev_ops::ndo_eth_ioctl for CAN devices + * supporting hardware timestamps + */ +int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + struct hwtstamp_config hwts_cfg = { 0 }; + + switch (cmd) { + case SIOCSHWTSTAMP: /* set */ + if (copy_from_user(&hwts_cfg, ifr->ifr_data, sizeof(hwts_cfg))) + return -EFAULT; + if (hwts_cfg.tx_type == HWTSTAMP_TX_ON && + hwts_cfg.rx_filter == HWTSTAMP_FILTER_ALL) + return 0; + return -ERANGE; + + case SIOCGHWTSTAMP: /* get */ + hwts_cfg.tx_type = HWTSTAMP_TX_ON; + hwts_cfg.rx_filter = HWTSTAMP_FILTER_ALL; + if (copy_to_user(ifr->ifr_data, &hwts_cfg, sizeof(hwts_cfg))) + return -EFAULT; + return 0; + + default: + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL(can_eth_ioctl_hwts); + /* generic implementation of ethtool_ops::get_ts_info for CAN devices * supporting hardware timestamps */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 752bd45d8ebf..c3e50e537e39 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -163,6 +163,7 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); +int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, struct ethtool_ts_info *info); -- cgit v1.2.3