summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/9p/client.h2
-rw-r--r--include/net/Space.h1
-rw-r--r--include/net/act_api.h8
-rw-r--r--include/net/addrconf.h23
-rw-r--r--include/net/af_rxrpc.h15
-rw-r--r--include/net/af_unix.h63
-rw-r--r--include/net/af_vsock.h12
-rw-r--r--include/net/ax25.h12
-rw-r--r--include/net/bluetooth/bluetooth.h17
-rw-r--r--include/net/bluetooth/hci.h78
-rw-r--r--include/net/bluetooth/hci_core.h258
-rw-r--r--include/net/bluetooth/hci_sock.h2
-rw-r--r--include/net/bluetooth/hci_sync.h29
-rw-r--r--include/net/bluetooth/l2cap.h33
-rw-r--r--include/net/bluetooth/rfcomm.h2
-rw-r--r--include/net/bond_3ad.h7
-rw-r--r--include/net/bond_alb.h2
-rw-r--r--include/net/bond_options.h1
-rw-r--r--include/net/bonding.h23
-rw-r--r--include/net/busy_poll.h6
-rw-r--r--include/net/caif/caif_layer.h6
-rw-r--r--include/net/caif/cfpkt.h2
-rw-r--r--include/net/calipso.h2
-rw-r--r--include/net/cfg80211.h1009
-rw-r--r--include/net/cfg802154.h72
-rw-r--r--include/net/checksum.h2
-rw-r--r--include/net/cipso_ipv4.h8
-rw-r--r--include/net/devlink.h94
-rw-r--r--include/net/dropreason-core.h83
-rw-r--r--include/net/dsa.h137
-rw-r--r--include/net/dsa_stubs.h22
-rw-r--r--include/net/dscp.h76
-rw-r--r--include/net/dst.h23
-rw-r--r--include/net/dst_cache.h4
-rw-r--r--include/net/dst_metadata.h17
-rw-r--r--include/net/eee.h38
-rw-r--r--include/net/erspan.h4
-rw-r--r--include/net/espintcp.h2
-rw-r--r--include/net/fib_rules.h3
-rw-r--r--include/net/flow_dissector.h25
-rw-r--r--include/net/flow_offload.h94
-rw-r--r--include/net/genetlink.h71
-rw-r--r--include/net/gre.h70
-rw-r--r--include/net/gro.h129
-rw-r--r--include/net/gtp.h5
-rw-r--r--include/net/hotdata.h55
-rw-r--r--include/net/hwbm.h4
-rw-r--r--include/net/ieee80211_radiotap.h9
-rw-r--r--include/net/ieee802154_netdev.h60
-rw-r--r--include/net/ieee8021q.h57
-rw-r--r--include/net/if_inet6.h6
-rw-r--r--include/net/inet6_hashtables.h14
-rw-r--r--include/net/inet_common.h4
-rw-r--r--include/net/inet_connection_sock.h31
-rw-r--r--include/net/inet_frag.h4
-rw-r--r--include/net/inet_hashtables.h31
-rw-r--r--include/net/inet_sock.h20
-rw-r--r--include/net/inet_timewait_sock.h20
-rw-r--r--include/net/inetpeer.h12
-rw-r--r--include/net/ioam6.h4
-rw-r--r--include/net/ip.h57
-rw-r--r--include/net/ip6_fib.h54
-rw-r--r--include/net/ip6_route.h24
-rw-r--r--include/net/ip6_tunnel.h4
-rw-r--r--include/net/ip_fib.h38
-rw-r--r--include/net/ip_tunnels.h155
-rw-r--r--include/net/ipv6.h78
-rw-r--r--include/net/ipv6_stubs.h8
-rw-r--r--include/net/iucv/iucv.h16
-rw-r--r--include/net/iw_handler.h12
-rw-r--r--include/net/l3mdev.h2
-rw-r--r--include/net/lib80211.h8
-rw-r--r--include/net/libeth/cache.h66
-rw-r--r--include/net/libeth/rx.h261
-rw-r--r--include/net/libeth/tx.h129
-rw-r--r--include/net/libeth/types.h25
-rw-r--r--include/net/llc_c_st.h4
-rw-r--r--include/net/llc_pdu.h2
-rw-r--r--include/net/llc_s_st.h4
-rw-r--r--include/net/mac80211.h542
-rw-r--r--include/net/mac802154.h6
-rw-r--r--include/net/macsec.h55
-rw-r--r--include/net/mana/gdma.h26
-rw-r--r--include/net/mana/hw_channel.h2
-rw-r--r--include/net/mana/mana.h83
-rw-r--r--include/net/mctp.h6
-rw-r--r--include/net/mptcp.h7
-rw-r--r--include/net/ndisc.h17
-rw-r--r--include/net/neighbour.h6
-rw-r--r--include/net/net_namespace.h24
-rw-r--r--include/net/netdev_queues.h125
-rw-r--r--include/net/netdev_rx_queue.h11
-rw-r--r--include/net/netfilter/nf_conntrack.h29
-rw-r--r--include/net/netfilter/nf_conntrack_count.h6
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h26
-rw-r--r--include/net/netfilter/nf_hooks_lwtunnel.h2
-rw-r--r--include/net/netfilter/nf_queue.h1
-rw-r--r--include/net/netfilter/nf_tables.h366
-rw-r--r--include/net/netfilter/nf_tables_core.h2
-rw-r--r--include/net/netfilter/nf_tproxy.h1
-rw-r--r--include/net/netfilter/nft_fib.h34
-rw-r--r--include/net/netfilter/nft_meta.h3
-rw-r--r--include/net/netfilter/nft_reject.h3
-rw-r--r--include/net/netkit.h44
-rw-r--r--include/net/netlabel.h37
-rw-r--r--include/net/netlink.h182
-rw-r--r--include/net/netmem.h174
-rw-r--r--include/net/netns/conntrack.h2
-rw-r--r--include/net/netns/core.h1
-rw-r--r--include/net/netns/ipv4.h67
-rw-r--r--include/net/netns/sctp.h4
-rw-r--r--include/net/netns/smc.h2
-rw-r--r--include/net/netns/xfrm.h3
-rw-r--r--include/net/nexthop.h48
-rw-r--r--include/net/nfc/nci.h2
-rw-r--r--include/net/nfc/nfc.h10
-rw-r--r--include/net/nl802154.h24
-rw-r--r--include/net/page_pool/helpers.h359
-rw-r--r--include/net/page_pool/types.h126
-rw-r--r--include/net/pfcp.h90
-rw-r--r--include/net/pkt_cls.h22
-rw-r--r--include/net/pkt_sched.h28
-rw-r--r--include/net/proto_memory.h83
-rw-r--r--include/net/protocol.h3
-rw-r--r--include/net/psample.h13
-rw-r--r--include/net/red.h20
-rw-r--r--include/net/regulatory.h5
-rw-r--r--include/net/request_sock.h80
-rw-r--r--include/net/route.h54
-rw-r--r--include/net/rps.h153
-rw-r--r--include/net/rstreason.h221
-rw-r--r--include/net/rtnetlink.h1
-rw-r--r--include/net/sch_generic.h66
-rw-r--r--include/net/scm.h20
-rw-r--r--include/net/sctp/sctp.h2
-rw-r--r--include/net/sctp/stream_sched.h8
-rw-r--r--include/net/sctp/structs.h28
-rw-r--r--include/net/seg6.h7
-rw-r--r--include/net/seg6_hmac.h7
-rw-r--r--include/net/seg6_local.h1
-rw-r--r--include/net/smc.h8
-rw-r--r--include/net/sock.h368
-rw-r--r--include/net/sock_reuseport.h2
-rw-r--r--include/net/strparser.h2
-rw-r--r--include/net/tc_act/tc_ct.h1
-rw-r--r--include/net/tc_act/tc_ipt.h17
-rw-r--r--include/net/tc_act/tc_mirred.h1
-rw-r--r--include/net/tc_wrapper.h6
-rw-r--r--include/net/tcp.h474
-rw-r--r--include/net/tcp_ao.h357
-rw-r--r--include/net/tcp_states.h2
-rw-r--r--include/net/tcx.h12
-rw-r--r--include/net/timewait_sock.h9
-rw-r--r--include/net/tls.h23
-rw-r--r--include/net/udp.h25
-rw-r--r--include/net/udp_tunnel.h25
-rw-r--r--include/net/vxlan.h33
-rw-r--r--include/net/x25.h2
-rw-r--r--include/net/xdp.h41
-rw-r--r--include/net/xdp_sock.h126
-rw-r--r--include/net/xdp_sock_drv.h58
-rw-r--r--include/net/xfrm.h130
-rw-r--r--include/net/xsk_buff_pool.h26
164 files changed, 6968 insertions, 2032 deletions
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 78ebcf782ce5..4f785098c67a 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -207,6 +207,8 @@ int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err
int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
int *err);
int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err);
+struct netfs_io_subrequest;
+void p9_client_write_subreq(struct netfs_io_subrequest *subreq);
int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset);
int p9dirent_read(struct p9_client *clnt, char *buf, int len,
struct p9_dirent *dirent);
diff --git a/include/net/Space.h b/include/net/Space.h
index c29f3d51c078..ef42629f4258 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -10,4 +10,3 @@ struct net_device *smc_init(int unit);
struct net_device *cs89x0_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
-struct net_device *cops_probe(int unit);
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 4ae0580b63ca..77ee0c657e2c 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -137,6 +137,7 @@ struct tc_action_ops {
#ifdef CONFIG_NET_CLS_ACT
+#define ACT_P_BOUND 0
#define ACT_P_CREATED 1
#define ACT_P_DELETED 1
@@ -191,7 +192,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int bind,
u32 flags);
-void tcf_idr_insert_many(struct tc_action *actions[]);
+void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]);
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
@@ -200,6 +201,8 @@ int tcf_idr_release(struct tc_action *a, bool bind);
int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
int tcf_unregister_action(struct tc_action_ops *a,
struct pernet_operations *ops);
+#define NET_ACT_ALIAS_PREFIX "net-act-"
+#define MODULE_ALIAS_NET_ACT(kind) MODULE_ALIAS(NET_ACT_ALIAS_PREFIX kind)
int tcf_action_destroy(struct tc_action *actions[], int bind);
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
@@ -207,8 +210,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est,
struct tc_action *actions[], int init_res[], size_t *attr_size,
u32 flags, u32 fl_flags, struct netlink_ext_ack *extack);
-struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
- bool rtnl_held,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index facb7a469efa..363dd63babe7 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -8,8 +8,9 @@
#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
-#define TEMP_VALID_LIFETIME (7*86400)
-#define TEMP_PREFERRED_LIFETIME (86400)
+#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */
+#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */
+#define REGEN_MIN_ADVANCE (2) /* 2 seconds */
#define REGEN_MAX_RETRY (3)
#define MAX_DESYNC_FACTOR (600)
@@ -36,10 +37,14 @@ struct prefix_info {
struct __packed {
#if defined(__BIG_ENDIAN_BITFIELD)
__u8 onlink : 1,
- autoconf : 1,
- reserved : 6;
+ autoconf : 1,
+ routeraddr : 1,
+ preferpd : 1,
+ reserved : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 reserved : 6,
+ __u8 reserved : 4,
+ preferpd : 1,
+ routeraddr : 1,
autoconf : 1,
onlink : 1;
#else
@@ -182,10 +187,12 @@ static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
return 0;
}
+#define INFINITY_LIFE_TIME 0xFFFFFFFF
+
static inline unsigned long addrconf_timeout_fixup(u32 timeout,
unsigned int unit)
{
- if (timeout == 0xffffffff)
+ if (timeout == INFINITY_LIFE_TIME)
return ~0UL;
/*
@@ -326,7 +333,7 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
/**
* __in6_dev_stats_get - get inet6_dev pointer for stats
* @dev: network device
- * @skb: skb for original incoming interface if neeeded
+ * @skb: skb for original incoming interface if needed
*
* Caller must hold rcu_read_lock or RTNL, because this function
* does not take a reference on the inet6_dev.
@@ -416,7 +423,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
if (unlikely(!idev))
return true;
- return !!idev->cnf.ignore_routes_with_linkdown;
+ return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown);
}
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 5531dd08061e..0754c463224a 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -15,6 +15,7 @@ struct key;
struct sock;
struct socket;
struct rxrpc_call;
+struct rxrpc_peer;
enum rxrpc_abort_reason;
enum rxrpc_interruptibility {
@@ -41,13 +42,14 @@ void rxrpc_kernel_new_call_notification(struct socket *,
rxrpc_notify_new_call_t,
rxrpc_discard_new_call_t);
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
- struct sockaddr_rxrpc *srx,
+ struct rxrpc_peer *peer,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
+ u16 service_id,
bool upgrade,
enum rxrpc_interruptibility interruptibility,
unsigned int debug_id);
@@ -60,9 +62,14 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, enum rxrpc_abort_reason);
void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call);
void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call);
-void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
- struct sockaddr_rxrpc *);
-bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
+struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
+void rxrpc_kernel_put_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call);
+const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer);
+const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer);
+unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 77bf30203d3c..63129c79b8cb 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -8,21 +8,46 @@
#include <linux/refcount.h>
#include <net/sock.h>
-void unix_inflight(struct user_struct *user, struct file *fp);
-void unix_notinflight(struct user_struct *user, struct file *fp);
-void unix_destruct_scm(struct sk_buff *skb);
-void io_uring_destruct_scm(struct sk_buff *skb);
+#if IS_ENABLED(CONFIG_UNIX)
+struct unix_sock *unix_get_socket(struct file *filp);
+#else
+static inline struct unix_sock *unix_get_socket(struct file *filp)
+{
+ return NULL;
+}
+#endif
+
+extern unsigned int unix_tot_inflight;
+void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
+void unix_del_edges(struct scm_fp_list *fpl);
+void unix_update_edges(struct unix_sock *receiver);
+int unix_prepare_fpl(struct scm_fp_list *fpl);
+void unix_destroy_fpl(struct scm_fp_list *fpl);
void unix_gc(void);
-void wait_for_unix_gc(void);
-struct sock *unix_get_socket(struct file *filp);
+void wait_for_unix_gc(struct scm_fp_list *fpl);
+
+struct unix_vertex {
+ struct list_head edges;
+ struct list_head entry;
+ struct list_head scc_entry;
+ unsigned long out_degree;
+ unsigned long index;
+ unsigned long scc_index;
+};
+
+struct unix_edge {
+ struct unix_sock *predecessor;
+ struct unix_sock *successor;
+ struct list_head vertex_entry;
+ struct list_head stack_entry;
+};
+
struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_MOD (256 - 1)
#define UNIX_HASH_SIZE (256 * 2)
#define UNIX_HASH_BITS 8
-extern unsigned int unix_tot_inflight;
-
struct unix_address {
refcount_t refcnt;
int len;
@@ -42,6 +67,7 @@ struct unix_skb_parms {
struct scm_stat {
atomic_t nr_fds;
+ unsigned long nr_unix_fds;
};
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
@@ -54,12 +80,9 @@ struct unix_sock {
struct path path;
struct mutex iolock, bindlock;
struct sock *peer;
- struct list_head link;
- unsigned long inflight;
+ struct sock *listener;
+ struct unix_vertex *vertex;
spinlock_t lock;
- unsigned long gc_flags;
-#define UNIX_GC_CANDIDATE 0
-#define UNIX_GC_MAYBE_CYCLE 1
struct socket_wq peer_wq;
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
@@ -73,20 +96,6 @@ struct unix_sock {
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-enum unix_socket_lock_class {
- U_LOCK_NORMAL,
- U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */
- U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
- U_LOCK_GC_LISTENER, /* used for listening socket while determining gc
- * candidates to close a small race window.
- */
-};
-
-static inline void unix_state_lock_nested(struct sock *sk,
- enum unix_socket_lock_class subclass)
-{
- spin_lock_nested(&unix_sk(sk)->lock, subclass);
-}
#define peer_wait peer_wq.wait
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f8b09a82f62e..70302c92d329 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -169,6 +169,9 @@ struct vsock_transport {
void (*notify_buffer_size)(struct vsock_sock *, u64 *);
int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
+ /* SIOCOUTQ ioctl */
+ ssize_t (*unsent_bytes)(struct vsock_sock *vsk);
+
/* Shutdown. */
int (*shutdown)(struct vsock_sock *, int);
@@ -177,6 +180,9 @@ struct vsock_transport {
/* Read a single skb */
int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
+
+ /* Zero-copy. */
+ bool (*msgzerocopy_allow)(void);
};
/**** CORE ****/
@@ -236,8 +242,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags);
-#ifdef CONFIG_BPF_SYSCALL
extern struct proto vsock_proto;
+#ifdef CONFIG_BPF_SYSCALL
int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void __init vsock_bpf_build_proto(void);
#else
@@ -245,4 +251,8 @@ static inline void __init vsock_bpf_build_proto(void)
{}
#endif
+static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
+{
+ return t->msgzerocopy_allow && t->msgzerocopy_allow();
+}
#endif /* __AF_VSOCK_H__ */
diff --git a/include/net/ax25.h b/include/net/ax25.h
index c2a85fd3f5ea..4ee141aae0a2 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -139,7 +139,9 @@ enum {
AX25_VALUES_N2, /* Default N2 value */
AX25_VALUES_PACLEN, /* AX.25 MTU */
AX25_VALUES_PROTOCOL, /* Std AX.25, DAMA Slave, DAMA Master */
+#ifdef CONFIG_AX25_DAMA_SLAVE
AX25_VALUES_DS_TIMEOUT, /* DAMA Slave timeout */
+#endif
AX25_MAX_VALUES /* THIS MUST REMAIN THE LAST ENTRY OF THIS LIST */
};
@@ -229,6 +231,7 @@ typedef struct ax25_dev {
#endif
refcount_t refcount;
bool device_up;
+ struct rcu_head rcu;
} ax25_dev;
typedef struct ax25_cb {
@@ -288,9 +291,8 @@ static inline void ax25_dev_hold(ax25_dev *ax25_dev)
static inline void ax25_dev_put(ax25_dev *ax25_dev)
{
- if (refcount_dec_and_test(&ax25_dev->refcount)) {
- kfree(ax25_dev);
- }
+ if (refcount_dec_and_test(&ax25_dev->refcount))
+ kfree_rcu(ax25_dev, rcu);
}
static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
{
@@ -333,9 +335,9 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *);
extern spinlock_t ax25_dev_lock;
#if IS_ENABLED(CONFIG_AX25)
-static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
+static inline ax25_dev *ax25_dev_ax25dev(const struct net_device *dev)
{
- return dev->ax25_ptr;
+ return rcu_dereference_rtnl(dev->ax25_ptr);
}
#endif
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index c25f9f4cac80..435250c72d56 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -286,7 +286,7 @@ void bt_err_ratelimited(const char *fmt, ...);
bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__)
/* Connection and socket states */
-enum {
+enum bt_sock_state {
BT_CONNECTED = 1, /* Equal to TCP_ESTABLISHED to make net code happy */
BT_OPEN,
BT_BOUND,
@@ -443,6 +443,10 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status,
u16 opcode, struct sk_buff *skb);
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
+ hci_req_complete_t *req_complete,
+ hci_req_complete_skb_t *req_complete_skb);
+
#define HCI_REQ_START BIT(0)
#define HCI_REQ_SKB BIT(1)
@@ -545,7 +549,7 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
return ERR_PTR(-EFAULT);
}
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
return skb;
}
@@ -587,15 +591,6 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
return skb;
}
-static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
- sockptr_t src, size_t src_size)
-{
- if (dst_size > src_size)
- return -EINVAL;
-
- return copy_from_sockptr(dst, src, dst_size);
-}
-
int bt_to_errno(u16 code);
__u8 bt_status(int err);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 77a3040a3f29..40fce4193cc1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (C) 2000-2001 Qualcomm Incorporated
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -29,6 +29,7 @@
#define HCI_MAX_ACL_SIZE 1024
#define HCI_MAX_SCO_SIZE 255
#define HCI_MAX_ISO_SIZE 251
+#define HCI_MAX_ISO_BIS 31
#define HCI_MAX_EVENT_SIZE 260
#define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4)
@@ -206,14 +207,17 @@ enum {
*/
HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
- /* When this quirk is set, the controller has validated that
- * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are
- * valid. This mechanism is necessary as many controllers have
- * been seen has having trouble initiating a connectable
- * advertisement despite the state combination being reported as
- * supported.
+ /* When this quirk is set, the LE states reported through the
+ * HCI_LE_READ_SUPPORTED_STATES are invalid/broken.
+ *
+ * This mechanism is necessary as many controllers have been seen has
+ * having trouble initiating a connectable advertisement despite the
+ * state combination being reported as supported.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
*/
- HCI_QUIRK_VALID_LE_STATES,
+ HCI_QUIRK_BROKEN_LE_STATES,
/* When this quirk is set, then erroneous data reporting
* is ignored. This is mainly due to the fact that the HCI
@@ -338,6 +342,33 @@ enum {
* claim to support it.
*/
HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE,
+
+ /*
+ * When this quirk is set, the reserved bits of Primary/Secondary_PHY
+ * inside the LE Extended Advertising Report events are discarded.
+ * This is required for some Apple/Broadcom controllers which
+ * abuse these reserved bits for unrelated flags.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
+
+ /* When this quirk is set, the HCI_OP_READ_VOICE_SETTING command is
+ * skipped. This is required for a subset of the CSR controller clones
+ * which erroneously claim to support it.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_BROKEN_READ_VOICE_SETTING,
+
+ /* When this quirk is set, the HCI_OP_READ_PAGE_SCAN_TYPE command is
+ * skipped. This is required for a subset of the CSR controller clones
+ * which erroneously claim to support it.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE,
};
/* HCI device flags */
@@ -380,6 +411,7 @@ enum {
HCI_SETUP,
HCI_CONFIG,
HCI_DEBUGFS_CREATED,
+ HCI_POWERING_DOWN,
HCI_AUTO_OFF,
HCI_RFKILLED,
HCI_MGMT,
@@ -446,7 +478,6 @@ enum {
#define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
#define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */
#define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */
-#define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */
/* HCI data types */
#define HCI_COMMAND_PKT 0x01
@@ -668,7 +699,7 @@ enum {
#define HCI_ERROR_REMOTE_POWER_OFF 0x15
#define HCI_ERROR_LOCAL_HOST_TERM 0x16
#define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18
-#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e
+#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1a
#define HCI_ERROR_INVALID_LL_PARAMS 0x1e
#define HCI_ERROR_UNSPECIFIED 0x1f
#define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c
@@ -683,6 +714,7 @@ enum {
#define HCI_RSSI_INVALID 127
#define HCI_SYNC_HANDLE_INVALID 0xffff
+#define HCI_SID_INVALID 0xff
#define HCI_ROLE_MASTER 0x00
#define HCI_ROLE_SLAVE 0x01
@@ -1881,6 +1913,8 @@ struct hci_cp_le_pa_create_sync {
__u8 sync_cte_type;
} __packed;
+#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045
+
#define HCI_OP_LE_PA_TERM_SYNC 0x2046
struct hci_cp_le_pa_term_sync {
__le16 handle;
@@ -1934,7 +1968,7 @@ struct hci_cp_le_set_ext_adv_data {
__u8 operation;
__u8 frag_pref;
__u8 length;
- __u8 data[];
+ __u8 data[] __counted_by(length);
} __packed;
#define HCI_OP_LE_SET_EXT_SCAN_RSP_DATA 0x2038
@@ -1943,7 +1977,7 @@ struct hci_cp_le_set_ext_scan_rsp_data {
__u8 operation;
__u8 frag_pref;
__u8 length;
- __u8 data[];
+ __u8 data[] __counted_by(length);
} __packed;
#define HCI_OP_LE_SET_EXT_ADV_ENABLE 0x2039
@@ -1962,13 +1996,14 @@ struct hci_cp_le_set_per_adv_params {
} __packed;
#define HCI_MAX_PER_AD_LENGTH 252
+#define HCI_MAX_PER_AD_TOT_LEN 1650
#define HCI_OP_LE_SET_PER_ADV_DATA 0x203f
struct hci_cp_le_set_per_adv_data {
__u8 handle;
__u8 operation;
__u8 length;
- __u8 data[];
+ __u8 data[] __counted_by(length);
} __packed;
#define HCI_OP_LE_SET_PER_ADV_ENABLE 0x2040
@@ -2051,7 +2086,7 @@ struct hci_cp_le_set_cig_params {
__le16 c_latency;
__le16 p_latency;
__u8 num_cis;
- struct hci_cis_params cis[];
+ struct hci_cis_params cis[] __counted_by(num_cis);
} __packed;
struct hci_rp_le_set_cig_params {
@@ -2069,7 +2104,7 @@ struct hci_cis {
struct hci_cp_le_create_cis {
__u8 num_cis;
- struct hci_cis cis[];
+ struct hci_cis cis[] __counted_by(num_cis);
} __packed;
#define HCI_OP_LE_REMOVE_CIG 0x2065
@@ -2123,7 +2158,7 @@ struct hci_cp_le_big_create_sync {
__u8 mse;
__le16 timeout;
__u8 num_bis;
- __u8 bis[];
+ __u8 bis[] __counted_by(num_bis);
} __packed;
#define HCI_OP_LE_BIG_TERM_SYNC 0x206c
@@ -2722,6 +2757,10 @@ struct hci_ev_le_per_adv_report {
__u8 data[];
} __packed;
+#define LE_PA_DATA_COMPLETE 0x00
+#define LE_PA_DATA_MORE_TO_COME 0x01
+#define LE_PA_DATA_TRUNCATED 0x02
+
#define HCI_EV_LE_EXT_ADV_SET_TERM 0x12
struct hci_evt_le_ext_adv_set_term {
__u8 status;
@@ -2775,7 +2814,7 @@ struct hci_evt_le_create_big_complete {
__le16 bis_handle[];
} __packed;
-#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d
+#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d
struct hci_evt_le_big_sync_estabilished {
__u8 status;
__u8 handle;
@@ -2896,6 +2935,11 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb)
return (struct hci_sco_hdr *) skb->data;
}
+static inline struct hci_iso_hdr *hci_iso_hdr(const struct sk_buff *skb)
+{
+ return (struct hci_iso_hdr *)skb->data;
+}
+
/* Command opcode pack/unpack */
#define hci_opcode_pack(ogf, ocf) ((__u16) ((ocf & 0x03ff)|(ogf << 10)))
#define hci_opcode_ogf(op) (op >> 10)
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4fcee6b734b7..3d1d7296aed9 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,7 +1,7 @@
/*
BlueZ - Bluetooth protocol stack for Linux
Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
- Copyright 2023 NXP
+ Copyright 2023-2024 NXP
Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
@@ -29,6 +29,7 @@
#include <linux/idr.h>
#include <linux/leds.h>
#include <linux/rculist.h>
+#include <linux/srcu.h>
#include <net/bluetooth/hci.h>
#include <net/bluetooth/hci_sync.h>
@@ -91,8 +92,6 @@ struct discovery_state {
s8 rssi;
u16 uuid_count;
u8 (*uuids)[16];
- unsigned long scan_start;
- unsigned long scan_duration;
unsigned long name_resolve_timeout;
};
@@ -241,6 +240,7 @@ struct adv_info {
bool periodic;
__u8 mesh;
__u8 instance;
+ __u8 handle;
__u32 flags;
__u16 timeout;
__u16 remaining_time;
@@ -339,6 +339,7 @@ struct adv_monitor {
struct hci_dev {
struct list_head list;
+ struct srcu_struct srcu;
struct mutex lock;
struct ida unset_handle_ida;
@@ -472,7 +473,6 @@ struct hci_dev {
unsigned int iso_pkts;
unsigned long acl_last_tx;
- unsigned long sco_last_tx;
unsigned long le_last_tx;
__u8 le_tx_def_phys;
@@ -504,7 +504,6 @@ struct hci_dev {
struct work_struct tx_work;
struct delayed_work le_scan_disable;
- struct delayed_work le_scan_restart;
struct sk_buff_head rx_q;
struct sk_buff_head raw_q;
@@ -525,7 +524,6 @@ struct hci_dev {
struct discovery_state discovery;
- int discovery_old_state;
bool discovery_paused;
int advertising_old_state;
bool advertising_paused;
@@ -542,6 +540,7 @@ struct hci_dev {
struct hci_conn_hash conn_hash;
struct list_head mesh_pending;
+ struct mutex mgmt_pending_lock;
struct list_head mgmt_pending;
struct list_head reject_list;
struct list_head accept_list;
@@ -644,6 +643,7 @@ struct hci_dev {
int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type,
struct bt_codec *codec, __u8 *vnd_len,
__u8 **vnd_data);
+ u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb);
};
#define HCI_PHY_HANDLE(handle) (handle & 0xff)
@@ -671,6 +671,7 @@ struct hci_conn {
__u8 adv_instance;
__u16 handle;
__u16 sync_handle;
+ __u8 sid;
__u16 state;
__u16 mtu;
__u8 mode;
@@ -702,14 +703,20 @@ struct hci_conn {
__u16 le_supv_timeout;
__u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH];
__u8 le_adv_data_len;
- __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH];
- __u8 le_per_adv_data_len;
+ __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN];
+ __u16 le_per_adv_data_len;
+ __u16 le_per_adv_data_offset;
+ __u8 le_adv_phy;
+ __u8 le_adv_sec_phy;
__u8 le_tx_phy;
__u8 le_rx_phy;
__s8 rssi;
__s8 tx_power;
__s8 max_tx_power;
struct bt_iso_qos iso_qos;
+ __u8 num_bis;
+ __u8 bis[HCI_MAX_ISO_BIS];
+
unsigned long flags;
enum conn_reasons conn_reason;
@@ -800,6 +807,7 @@ struct hci_conn_params {
extern struct list_head hci_dev_list;
extern struct list_head hci_cb_list;
extern rwlock_t hci_dev_list_lock;
+extern struct mutex hci_cb_list_lock;
#define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags)
#define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags)
@@ -809,20 +817,20 @@ extern rwlock_t hci_dev_list_lock;
#define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags)
#define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags)
-#define hci_dev_clear_volatile_flags(hdev) \
- do { \
- hci_dev_clear_flag(hdev, HCI_LE_SCAN); \
- hci_dev_clear_flag(hdev, HCI_LE_ADV); \
- hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\
- hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \
- hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \
+#define hci_dev_clear_volatile_flags(hdev) \
+ do { \
+ hci_dev_clear_flag((hdev), HCI_LE_SCAN); \
+ hci_dev_clear_flag((hdev), HCI_LE_ADV); \
+ hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \
+ hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \
+ hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \
} while (0)
#define hci_dev_le_state_simultaneous(hdev) \
- (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \
- (hdev->le_states[4] & 0x08) && /* Central */ \
- (hdev->le_states[4] & 0x40) && /* Peripheral */ \
- (hdev->le_states[3] & 0x10)) /* Simultaneous */
+ (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \
+ ((hdev)->le_states[4] & 0x08) && /* Central */ \
+ ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \
+ ((hdev)->le_states[3] & 0x10)) /* Simultaneous */
/* ----- HCI interface to upper protocols ----- */
int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
@@ -881,8 +889,6 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
hdev->discovery.uuid_count = 0;
kfree(hdev->discovery.uuids);
hdev->discovery.uuids = NULL;
- hdev->discovery.scan_start = 0;
- hdev->discovery.scan_duration = 0;
}
bool hci_discovery_active(struct hci_dev *hdev);
@@ -946,8 +952,10 @@ enum {
HCI_CONN_PER_ADV,
HCI_CONN_BIG_CREATED,
HCI_CONN_CREATE_CIS,
+ HCI_CONN_CREATE_BIG_SYNC,
HCI_CONN_BIG_SYNC,
HCI_CONN_BIG_SYNC_FAILED,
+ HCI_CONN_CREATE_PA_SYNC,
HCI_CONN_PA_SYNC,
HCI_CONN_PA_SYNC_FAILED,
};
@@ -1101,6 +1109,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
}
static inline struct hci_conn *
+hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type != ISO_LINK)
+ continue;
+
+ if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags))
+ continue;
+
+ rcu_read_unlock();
+ return c;
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *
hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev,
bdaddr_t *ba,
__u8 big, __u8 bis)
@@ -1270,8 +1302,9 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
return NULL;
}
-static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *hdev,
- __u8 handle)
+static inline struct hci_conn *
+hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev,
+ __u8 handle, __u8 num_bis)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
@@ -1282,7 +1315,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *
if (c->type != ISO_LINK)
continue;
- if (handle != BT_ISO_QOS_BIG_UNSET && handle == c->iso_qos.bcast.big) {
+ if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) {
rcu_read_unlock();
return c;
}
@@ -1349,8 +1382,14 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK ||
- !test_bit(HCI_CONN_PA_SYNC, &c->flags))
+ if (c->type != ISO_LINK)
+ continue;
+
+ /* Ignore the listen hcon, we are looking
+ * for the child hcon that was created as
+ * a result of the PA sync established event.
+ */
+ if (c->state == BT_LISTEN)
continue;
if (c->sync_handle == sync_handle) {
@@ -1404,6 +1443,26 @@ static inline void hci_conn_hash_list_state(struct hci_dev *hdev,
rcu_read_unlock();
}
+static inline void hci_conn_hash_list_flag(struct hci_dev *hdev,
+ hci_conn_func_t func, __u8 type,
+ __u8 flag, void *data)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ if (!func)
+ return;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type == type && test_bit(flag, &c->flags))
+ func(c, data);
+ }
+
+ rcu_read_unlock();
+}
+
static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
{
struct hci_conn_hash *h = &hdev->conn_hash;
@@ -1470,13 +1529,14 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
enum conn_reasons conn_reason);
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, bool dst_resolved, u8 sec_level,
- u16 conn_timeout, u8 role);
+ u16 conn_timeout, u8 role, u8 phy, u8 sec_phy);
void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status);
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
u8 sec_level, u8 auth_type,
- enum conn_reasons conn_reason);
+ enum conn_reasons conn_reason, u16 timeout);
struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
- __u16 setting, struct bt_codec *codec);
+ __u16 setting, struct bt_codec *codec,
+ u16 timeout);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
@@ -1487,11 +1547,11 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos,
__u8 data_len, __u8 *data);
-int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
- __u8 sid, struct bt_iso_qos *qos);
-int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
- struct bt_iso_qos *qos,
- __u16 sync_handle, __u8 num_bis, __u8 bis[]);
+struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, __u8 sid, struct bt_iso_qos *qos);
+int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos, __u16 sync_handle,
+ __u8 num_bis, __u8 bis[]);
int hci_conn_check_link_mode(struct hci_conn *conn);
int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
@@ -1862,6 +1922,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
((dev)->commands[20] & 0x10 && \
!test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks))
+#define read_voice_setting_capable(dev) \
+ ((dev)->commands[9] & 0x04 && \
+ !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks))
+
/* Use enhanced synchronous connection if command is supported and its quirk
* has not been set.
*/
@@ -1948,47 +2012,24 @@ struct hci_cb {
char *name;
- bool (*match) (struct hci_conn *conn);
void (*connect_cfm) (struct hci_conn *conn, __u8 status);
void (*disconn_cfm) (struct hci_conn *conn, __u8 status);
void (*security_cfm) (struct hci_conn *conn, __u8 status,
- __u8 encrypt);
+ __u8 encrypt);
void (*key_change_cfm) (struct hci_conn *conn, __u8 status);
void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role);
};
-static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list)
-{
- struct hci_cb *cb, *cpy;
-
- rcu_read_lock();
- list_for_each_entry_rcu(cb, &hci_cb_list, list) {
- if (cb->match && cb->match(conn)) {
- cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC);
- if (!cpy)
- break;
-
- *cpy = *cb;
- INIT_LIST_HEAD(&cpy->list);
- list_add_rcu(&cpy->list, list);
- }
- }
- rcu_read_unlock();
-}
-
static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->connect_cfm)
cb->connect_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->connect_cfm_cb)
conn->connect_cfm_cb(conn, status);
@@ -1996,43 +2037,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
+ struct hci_cb *cb;
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->disconn_cfm)
cb->disconn_cfm(conn, reason);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->disconn_cfm_cb)
conn->disconn_cfm_cb(conn, reason);
}
-static inline void hci_security_cfm(struct hci_conn *conn, __u8 status,
- __u8 encrypt)
-{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
- if (cb->security_cfm)
- cb->security_cfm(conn, status, encrypt);
- kfree(cb);
- }
-
- if (conn->security_cfm_cb)
- conn->security_cfm_cb(conn, status);
-}
-
static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
@@ -2040,11 +2060,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (conn->state == BT_CONFIG) {
@@ -2071,38 +2100,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
conn->sec_level = conn->pending_sec_level;
}
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->key_change_cfm)
cb->key_change_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
__u8 role)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->role_switch_cfm)
cb->role_switch_cfm(conn, status, role);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type)
@@ -2249,8 +2280,22 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
/* These LE scan and inquiry parameters were chosen according to LE General
* Discovery Procedure specification.
*/
-#define DISCOV_LE_SCAN_WIN 0x12
-#define DISCOV_LE_SCAN_INT 0x12
+#define DISCOV_LE_SCAN_WIN 0x0012 /* 11.25 msec */
+#define DISCOV_LE_SCAN_INT 0x0012 /* 11.25 msec */
+#define DISCOV_LE_SCAN_INT_FAST 0x0060 /* 60 msec */
+#define DISCOV_LE_SCAN_WIN_FAST 0x0030 /* 30 msec */
+#define DISCOV_LE_SCAN_INT_CONN 0x0060 /* 60 msec */
+#define DISCOV_LE_SCAN_WIN_CONN 0x0060 /* 60 msec */
+#define DISCOV_LE_SCAN_INT_SLOW1 0x0800 /* 1.28 sec */
+#define DISCOV_LE_SCAN_WIN_SLOW1 0x0012 /* 11.25 msec */
+#define DISCOV_LE_SCAN_INT_SLOW2 0x1000 /* 2.56 sec */
+#define DISCOV_LE_SCAN_WIN_SLOW2 0x0024 /* 22.5 msec */
+#define DISCOV_CODED_SCAN_INT_FAST 0x0120 /* 180 msec */
+#define DISCOV_CODED_SCAN_WIN_FAST 0x0090 /* 90 msec */
+#define DISCOV_CODED_SCAN_INT_SLOW1 0x1800 /* 3.84 sec */
+#define DISCOV_CODED_SCAN_WIN_SLOW1 0x0036 /* 33.75 msec */
+#define DISCOV_CODED_SCAN_INT_SLOW2 0x3000 /* 7.68 sec */
+#define DISCOV_CODED_SCAN_WIN_SLOW2 0x006c /* 67.5 msec */
#define DISCOV_LE_TIMEOUT 10240 /* msec */
#define DISCOV_INTERLEAVED_TIMEOUT 5120 /* msec */
#define DISCOV_INTERLEAVED_INQUIRY_LEN 0x04
@@ -2337,7 +2382,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
u8 instance);
void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
u8 instance);
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
bdaddr_t *bdaddr, u8 addr_type);
diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h
index 9949870f7d78..13e8cd4414a1 100644
--- a/include/net/bluetooth/hci_sock.h
+++ b/include/net/bluetooth/hci_sock.h
@@ -144,7 +144,7 @@ struct hci_dev_req {
struct hci_dev_list_req {
__u16 dev_num;
- struct hci_dev_req dev_req[]; /* hci_dev_req structures */
+ struct hci_dev_req dev_req[] __counted_by(dev_num);
};
struct hci_conn_list_req {
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 3cb2d10cac93..dbabc17b30cd 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -8,6 +8,23 @@
#define UINT_PTR(_handle) ((void *)((uintptr_t)_handle))
#define PTR_UINT(_ptr) ((uintptr_t)((void *)_ptr))
+#define HCI_REQ_DONE 0
+#define HCI_REQ_PEND 1
+#define HCI_REQ_CANCELED 2
+
+#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock)
+#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
+
+struct hci_request {
+ struct hci_dev *hdev;
+ struct sk_buff_head cmd_q;
+
+ /* If something goes wrong when building the HCI request, the error
+ * value is stored in this field.
+ */
+ int err;
+};
+
typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data);
typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data,
int err);
@@ -20,6 +37,10 @@ struct hci_cmd_sync_work_entry {
};
struct adv_info;
+
+struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, struct sock *sk);
+
/* Function with sync suffix shall not be called with hdev->lock held as they
* wait the command to complete and in the meantime an event could be received
* which could attempt to acquire hdev->lock causing a deadlock.
@@ -135,6 +156,8 @@ int hci_update_discoverable(struct hci_dev *hdev);
int hci_update_connectable_sync(struct hci_dev *hdev);
+int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp);
+
int hci_start_discovery_sync(struct hci_dev *hdev);
int hci_stop_discovery_sync(struct hci_dev *hdev);
@@ -142,6 +165,7 @@ int hci_suspend_sync(struct hci_dev *hdev);
int hci_resume_sync(struct hci_dev *hdev);
struct hci_conn;
+struct hci_conn_params;
int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason);
@@ -160,3 +184,8 @@ int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn);
int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn);
int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ struct hci_conn_params *params);
+
+int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d2a1154121d0..9189354c568f 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -27,7 +27,7 @@
#ifndef __L2CAP_H
#define __L2CAP_H
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/atomic.h>
/* L2CAP defaults */
@@ -463,18 +463,24 @@ struct l2cap_le_credits {
#define L2CAP_ECRED_MAX_CID 5
struct l2cap_ecred_conn_req {
- __le16 psm;
- __le16 mtu;
- __le16 mps;
- __le16 credits;
+ /* New members must be added within the struct_group() macro below. */
+ __struct_group(l2cap_ecred_conn_req_hdr, hdr, __packed,
+ __le16 psm;
+ __le16 mtu;
+ __le16 mps;
+ __le16 credits;
+ );
__le16 scid[];
} __packed;
struct l2cap_ecred_conn_rsp {
- __le16 mtu;
- __le16 mps;
- __le16 credits;
- __le16 result;
+ /* New members must be added within the struct_group() macro below. */
+ struct_group_tagged(l2cap_ecred_conn_rsp_hdr, hdr,
+ __le16 mtu;
+ __le16 mps;
+ __le16 credits;
+ __le16 result;
+ );
__le16 dcid[];
};
@@ -662,7 +668,7 @@ struct l2cap_conn {
struct l2cap_chan *smp;
struct list_head chan_l;
- struct mutex chan_lock;
+ struct mutex lock;
struct kref ref;
struct list_head users;
};
@@ -947,7 +953,7 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid);
struct l2cap_chan *l2cap_chan_create(void);
void l2cap_chan_close(struct l2cap_chan *chan, int reason);
int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
- bdaddr_t *dst, u8 dst_type);
+ bdaddr_t *dst, u8 dst_type, u16 timeout);
int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu);
int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len);
void l2cap_chan_busy(struct l2cap_chan *chan, int busy);
@@ -962,12 +968,9 @@ void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func,
void *data);
void l2cap_chan_del(struct l2cap_chan *chan, int err);
void l2cap_send_conn_req(struct l2cap_chan *chan);
-void l2cap_move_start(struct l2cap_chan *chan);
-void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan,
- u8 status);
-void __l2cap_physical_cfm(struct l2cap_chan *chan, int result);
struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn);
+struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *conn);
void l2cap_conn_put(struct l2cap_conn *conn);
int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user);
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 99d26879b02a..c05882476900 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -355,7 +355,7 @@ struct rfcomm_dev_info {
struct rfcomm_dev_list_req {
u16 dev_num;
- struct rfcomm_dev_info dev_info[];
+ struct rfcomm_dev_info dev_info[] __counted_by(dev_num);
};
int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index c5e57c6bd873..2053cd8e788a 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -54,6 +54,8 @@ typedef enum {
AD_MUX_DETACHED, /* mux machine */
AD_MUX_WAITING, /* mux machine */
AD_MUX_ATTACHED, /* mux machine */
+ AD_MUX_COLLECTING, /* mux machine */
+ AD_MUX_DISTRIBUTING, /* mux machine */
AD_MUX_COLLECTING_DISTRIBUTING /* mux machine */
} mux_states_t;
@@ -229,7 +231,10 @@ typedef struct port {
mux_states_t sm_mux_state; /* state machine mux state */
u16 sm_mux_timer_counter; /* state machine mux timer counter */
tx_states_t sm_tx_state; /* state machine tx state */
- u16 sm_tx_timer_counter; /* state machine tx timer counter(allways on - enter to transmit state 3 time per second) */
+ u16 sm_tx_timer_counter; /* state machine tx timer counter
+ * (always on - enter to transmit
+ * state 3 time per second)
+ */
u16 sm_churn_actor_timer_counter;
u16 sm_churn_partner_timer_counter;
u32 churn_actor_count;
diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index 9dc082b2d543..e5945427f38d 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -53,7 +53,7 @@ struct slave;
struct tlb_client_info {
- struct slave *tx_slave; /* A pointer to slave used for transmiting
+ struct slave *tx_slave; /* A pointer to slave used for transmitting
* packets to a Client that the Hash function
* gave this entry index.
*/
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index f631d9f09941..18687ccf0638 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -76,6 +76,7 @@ enum {
BOND_OPT_MISSED_MAX,
BOND_OPT_NS_TARGETS,
BOND_OPT_PRIO,
+ BOND_OPT_COUPLED_CONTROL,
BOND_OPT_LAST
};
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 94594026a5c5..8bb5f016969f 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -148,6 +148,7 @@ struct bond_params {
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ns_targets[BOND_MAX_NS_TARGETS];
#endif
+ int coupled_control;
/* 2 bytes of padding : see ether_addr_equal_64bits() */
u8 ad_actor_system[ETH_ALEN + 2];
@@ -167,6 +168,7 @@ struct slave {
u8 backup:1, /* indicates backup slave. Value corresponds with
BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
inactive:1, /* indicates inactive slave */
+ rx_disabled:1, /* indicates whether slave's Rx is disabled */
should_notify:1, /* indicates whether the state changed */
should_notify_link:1; /* indicates whether the link changed */
u8 duplex;
@@ -568,6 +570,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave,
bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);
if (!slave->bond->params.all_slaves_active)
slave->inactive = 1;
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD)
+ slave->rx_disabled = 1;
+}
+
+static inline void bond_set_slave_tx_disabled_flags(struct slave *slave,
+ bool notify)
+{
+ bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);
}
static inline void bond_set_slave_active_flags(struct slave *slave,
@@ -575,6 +585,14 @@ static inline void bond_set_slave_active_flags(struct slave *slave,
{
bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify);
slave->inactive = 0;
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD)
+ slave->rx_disabled = 0;
+}
+
+static inline void bond_set_slave_rx_enabled_flags(struct slave *slave,
+ bool notify)
+{
+ slave->rx_disabled = 0;
}
static inline bool bond_is_slave_inactive(struct slave *slave)
@@ -582,6 +600,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
return slave->inactive;
}
+static inline bool bond_is_slave_rx_disabled(struct slave *slave)
+{
+ return slave->rx_disabled;
+}
+
static inline void bond_propose_link_state(struct slave *slave, int state)
{
slave->link_new_state = state;
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 9f2ce4d05c26..f03040baaefd 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -48,6 +48,10 @@ void napi_busy_loop(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+void napi_busy_loop_rcu(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+
#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
{
@@ -127,7 +131,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
#endif
}
-/* used in the protocol hanlder to propagate the napi_id to the socket */
+/* used in the protocol handler to propagate the napi_id to the socket */
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 51f7bb42a936..053e7c6a6a66 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -11,9 +11,7 @@
struct cflayer;
struct cfpkt;
-struct cfpktq;
struct caif_payload_info;
-struct caif_packet_funcs;
#define CAIF_LAYER_NAME_SZ 16
@@ -22,7 +20,7 @@ struct caif_packet_funcs;
* @assert: expression to evaluate.
*
* This function will print a error message and a do WARN_ON if the
- * assertion failes. Normally this will do a stack up at the current location.
+ * assertion fails. Normally this will do a stack up at the current location.
*/
#define caif_assert(assert) \
do { \
@@ -118,7 +116,7 @@ enum caif_direction {
* @dn: Pointer down to the layer below.
* @node: List node used when layer participate in a list.
* @receive: Packet receive function.
- * @transmit: Packet transmit funciton.
+ * @transmit: Packet transmit function.
* @ctrlcmd: Used for control signalling upwards in the stack.
* @modemcmd: Used for control signaling downwards in the stack.
* @id: The identity of this layer
diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h
index 44d914a50369..acf664227d96 100644
--- a/include/net/caif/cfpkt.h
+++ b/include/net/caif/cfpkt.h
@@ -18,7 +18,7 @@ struct cfpkt *cfpkt_create(u16 len);
/*
* Destroy a CAIF Packet.
- * pkt Packet to be destoyed.
+ * pkt Packet to be destroyed.
*/
void cfpkt_destroy(struct cfpkt *pkt);
diff --git a/include/net/calipso.h b/include/net/calipso.h
index f8667a3fda9e..76b9e08c10c2 100644
--- a/include/net/calipso.h
+++ b/include/net/calipso.h
@@ -25,7 +25,7 @@
#include <net/netlabel.h>
#include <net/request_sock.h>
#include <linux/refcount.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* known doi values */
#define CALIPSO_DOI_UNKNOWN 0x00000000
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 802ea3080d0b..bb1862536f9c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021, 2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ethtool.h>
@@ -52,7 +52,7 @@
* such wiphy can have zero, one, or many virtual interfaces associated with
* it, which need to be identified as such by pointing the network interface's
* @ieee80211_ptr pointer to a &struct wireless_dev which further describes
- * the wireless part of the interface, normally this struct is embedded in the
+ * the wireless part of the interface. Normally this struct is embedded in the
* network interface's private data area. Drivers can optionally allow creating
* or destroying virtual interfaces on the fly, but without at least one or the
* ability to create some the wireless device isn't useful.
@@ -76,6 +76,8 @@ struct wiphy;
* @IEEE80211_CHAN_DISABLED: This channel is disabled.
* @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes
* sending probe requests or beaconing.
+ * @IEEE80211_CHAN_PSD: Power spectral density (in dBm) is set for this
+ * channel.
* @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
* @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel
* is not permitted.
@@ -115,29 +117,47 @@ struct wiphy;
* This may be due to the driver or due to regulatory bandwidth
* restrictions.
* @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel.
+ * @IEEE80211_CHAN_DFS_CONCURRENT: See %NL80211_RRF_DFS_CONCURRENT
+ * @IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT: Client connection with VLP AP
+ * not permitted using this channel
+ * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP
+ * not permitted using this channel
+ * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor
+ * mode even in the presence of other (regulatory) restrictions,
+ * even if it is otherwise disabled.
+ * @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation
+ * with very low power (VLP), even if otherwise set to NO_IR.
+ * @IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY: Allow activity on a 20 MHz channel,
+ * even if otherwise set to NO_IR.
*/
enum ieee80211_channel_flags {
- IEEE80211_CHAN_DISABLED = 1<<0,
- IEEE80211_CHAN_NO_IR = 1<<1,
- /* hole at 1<<2 */
- IEEE80211_CHAN_RADAR = 1<<3,
- IEEE80211_CHAN_NO_HT40PLUS = 1<<4,
- IEEE80211_CHAN_NO_HT40MINUS = 1<<5,
- IEEE80211_CHAN_NO_OFDM = 1<<6,
- IEEE80211_CHAN_NO_80MHZ = 1<<7,
- IEEE80211_CHAN_NO_160MHZ = 1<<8,
- IEEE80211_CHAN_INDOOR_ONLY = 1<<9,
- IEEE80211_CHAN_IR_CONCURRENT = 1<<10,
- IEEE80211_CHAN_NO_20MHZ = 1<<11,
- IEEE80211_CHAN_NO_10MHZ = 1<<12,
- IEEE80211_CHAN_NO_HE = 1<<13,
- IEEE80211_CHAN_1MHZ = 1<<14,
- IEEE80211_CHAN_2MHZ = 1<<15,
- IEEE80211_CHAN_4MHZ = 1<<16,
- IEEE80211_CHAN_8MHZ = 1<<17,
- IEEE80211_CHAN_16MHZ = 1<<18,
- IEEE80211_CHAN_NO_320MHZ = 1<<19,
- IEEE80211_CHAN_NO_EHT = 1<<20,
+ IEEE80211_CHAN_DISABLED = BIT(0),
+ IEEE80211_CHAN_NO_IR = BIT(1),
+ IEEE80211_CHAN_PSD = BIT(2),
+ IEEE80211_CHAN_RADAR = BIT(3),
+ IEEE80211_CHAN_NO_HT40PLUS = BIT(4),
+ IEEE80211_CHAN_NO_HT40MINUS = BIT(5),
+ IEEE80211_CHAN_NO_OFDM = BIT(6),
+ IEEE80211_CHAN_NO_80MHZ = BIT(7),
+ IEEE80211_CHAN_NO_160MHZ = BIT(8),
+ IEEE80211_CHAN_INDOOR_ONLY = BIT(9),
+ IEEE80211_CHAN_IR_CONCURRENT = BIT(10),
+ IEEE80211_CHAN_NO_20MHZ = BIT(11),
+ IEEE80211_CHAN_NO_10MHZ = BIT(12),
+ IEEE80211_CHAN_NO_HE = BIT(13),
+ IEEE80211_CHAN_1MHZ = BIT(14),
+ IEEE80211_CHAN_2MHZ = BIT(15),
+ IEEE80211_CHAN_4MHZ = BIT(16),
+ IEEE80211_CHAN_8MHZ = BIT(17),
+ IEEE80211_CHAN_16MHZ = BIT(18),
+ IEEE80211_CHAN_NO_320MHZ = BIT(19),
+ IEEE80211_CHAN_NO_EHT = BIT(20),
+ IEEE80211_CHAN_DFS_CONCURRENT = BIT(21),
+ IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22),
+ IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23),
+ IEEE80211_CHAN_CAN_MONITOR = BIT(24),
+ IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25),
+ IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY = BIT(26),
};
#define IEEE80211_CHAN_NO_HT40 \
@@ -171,6 +191,7 @@ enum ieee80211_channel_flags {
* on this channel.
* @dfs_state_entered: timestamp (jiffies) when the dfs state was entered.
* @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels.
+ * @psd: power spectral density (in dBm)
*/
struct ieee80211_channel {
enum nl80211_band band;
@@ -187,6 +208,7 @@ struct ieee80211_channel {
enum nl80211_dfs_state dfs_state;
unsigned long dfs_state_entered;
unsigned int dfs_cac_ms;
+ s8 psd;
};
/**
@@ -213,13 +235,13 @@ struct ieee80211_channel {
* @IEEE80211_RATE_SUPPORTS_10MHZ: Rate can be used in 10 MHz mode
*/
enum ieee80211_rate_flags {
- IEEE80211_RATE_SHORT_PREAMBLE = 1<<0,
- IEEE80211_RATE_MANDATORY_A = 1<<1,
- IEEE80211_RATE_MANDATORY_B = 1<<2,
- IEEE80211_RATE_MANDATORY_G = 1<<3,
- IEEE80211_RATE_ERP_G = 1<<4,
- IEEE80211_RATE_SUPPORTS_5MHZ = 1<<5,
- IEEE80211_RATE_SUPPORTS_10MHZ = 1<<6,
+ IEEE80211_RATE_SHORT_PREAMBLE = BIT(0),
+ IEEE80211_RATE_MANDATORY_A = BIT(1),
+ IEEE80211_RATE_MANDATORY_B = BIT(2),
+ IEEE80211_RATE_MANDATORY_G = BIT(3),
+ IEEE80211_RATE_ERP_G = BIT(4),
+ IEEE80211_RATE_SUPPORTS_5MHZ = BIT(5),
+ IEEE80211_RATE_SUPPORTS_10MHZ = BIT(6),
};
/**
@@ -410,6 +432,19 @@ struct ieee80211_sta_eht_cap {
u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN];
};
+/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */
+#ifdef __CHECKER__
+/*
+ * This is used to mark the sband->iftype_data pointer which is supposed
+ * to be an array with special access semantics (per iftype), but a lot
+ * of code got it wrong in the past, so with this marking sparse will be
+ * noisy when the pointer is used directly.
+ */
+# define __iftd __attribute__((noderef, address_space(__iftype_data)))
+#else
+# define __iftd
+#endif /* __CHECKER__ */
+
/**
* struct ieee80211_sband_iftype_data - sband data per interface type
*
@@ -543,10 +578,48 @@ struct ieee80211_supported_band {
struct ieee80211_sta_s1g_cap s1g_cap;
struct ieee80211_edmg edmg_cap;
u16 n_iftype_data;
- const struct ieee80211_sband_iftype_data *iftype_data;
+ const struct ieee80211_sband_iftype_data __iftd *iftype_data;
};
/**
+ * _ieee80211_set_sband_iftype_data - set sband iftype data array
+ * @sband: the sband to initialize
+ * @iftd: the iftype data array pointer
+ * @n_iftd: the length of the iftype data array
+ *
+ * Set the sband iftype data array; use this where the length cannot
+ * be derived from the ARRAY_SIZE() of the argument, but prefer
+ * ieee80211_set_sband_iftype_data() where it can be used.
+ */
+static inline void
+_ieee80211_set_sband_iftype_data(struct ieee80211_supported_band *sband,
+ const struct ieee80211_sband_iftype_data *iftd,
+ u16 n_iftd)
+{
+ sband->iftype_data = (const void __iftd __force *)iftd;
+ sband->n_iftype_data = n_iftd;
+}
+
+/**
+ * ieee80211_set_sband_iftype_data - set sband iftype data array
+ * @sband: the sband to initialize
+ * @iftd: the iftype data array
+ */
+#define ieee80211_set_sband_iftype_data(sband, iftd) \
+ _ieee80211_set_sband_iftype_data(sband, iftd, ARRAY_SIZE(iftd))
+
+/**
+ * for_each_sband_iftype_data - iterate sband iftype data entries
+ * @sband: the sband whose iftype_data array to iterate
+ * @i: iterator counter
+ * @iftd: iftype data pointer to set
+ */
+#define for_each_sband_iftype_data(sband, i, iftd) \
+ for (i = 0, iftd = (const void __force *)&(sband)->iftype_data[i]; \
+ i < (sband)->n_iftype_data; \
+ i++, iftd = (const void __force *)&(sband)->iftype_data[i])
+
+/**
* ieee80211_get_sband_iftype_data - return sband data for a given iftype
* @sband: the sband to search for the STA on
* @iftype: enum nl80211_iftype
@@ -557,6 +630,7 @@ static inline const struct ieee80211_sband_iftype_data *
ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
u8 iftype)
{
+ const struct ieee80211_sband_iftype_data *data;
int i;
if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
@@ -565,10 +639,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
if (iftype == NL80211_IFTYPE_AP_VLAN)
iftype = NL80211_IFTYPE_AP;
- for (i = 0; i < sband->n_iftype_data; i++) {
- const struct ieee80211_sband_iftype_data *data =
- &sband->iftype_data[i];
-
+ for_each_sband_iftype_data(sband, i, data) {
if (data->types_mask & BIT(iftype))
return data;
}
@@ -747,6 +818,9 @@ struct key_params {
* chan will define the primary channel and all other
* parameters are ignored.
* @freq1_offset: offset from @center_freq1, in KHz
+ * @punctured: mask of the punctured 20 MHz subchannels, with
+ * bits turned on being disabled (punctured); numbered
+ * from lower to higher frequency (like in the spec)
*/
struct cfg80211_chan_def {
struct ieee80211_channel *chan;
@@ -755,6 +829,7 @@ struct cfg80211_chan_def {
u32 center_freq2;
struct ieee80211_edmg edmg;
u16 freq1_offset;
+ u16 punctured;
};
/*
@@ -895,7 +970,8 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
chandef1->width == chandef2->width &&
chandef1->center_freq1 == chandef2->center_freq1 &&
chandef1->freq1_offset == chandef2->freq1_offset &&
- chandef1->center_freq2 == chandef2->center_freq2);
+ chandef1->center_freq2 == chandef2->center_freq2 &&
+ chandef1->punctured == chandef2->punctured);
}
/**
@@ -924,6 +1000,15 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
const struct cfg80211_chan_def *chandef2);
/**
+ * nl80211_chan_width_to_mhz - get the channel width in MHz
+ * @chan_width: the channel width from &enum nl80211_chan_width
+ *
+ * Return: channel width in MHz if the chan_width from &enum nl80211_chan_width
+ * is valid. -1 otherwise.
+ */
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
+
+/**
* cfg80211_chandef_valid - check if a channel definition is valid
* @chandef: the channel definition to check
* Return: %true if the channel definition is valid. %false otherwise.
@@ -954,6 +1039,44 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
enum nl80211_iftype iftype);
/**
+ * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable and we
+ * can/need start CAC on such channel
+ * @wiphy: the wiphy to validate against
+ * @chandef: the channel definition to check
+ *
+ * Return: true if all channels available and at least
+ * one channel requires CAC (NL80211_DFS_USABLE)
+ */
+bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef);
+
+/**
+ * cfg80211_chandef_dfs_cac_time - get the DFS CAC time (in ms) for given
+ * channel definition
+ * @wiphy: the wiphy to validate against
+ * @chandef: the channel definition to check
+ *
+ * Returns: DFS CAC time (in ms) which applies for this channel definition
+ */
+unsigned int
+cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef);
+
+/**
+ * cfg80211_chandef_primary - calculate primary 40/80/160 MHz freq
+ * @chandef: chandef to calculate for
+ * @primary_chan_width: primary channel width to calculate center for
+ * @punctured: punctured sub-channel bitmap, will be recalculated
+ * according to the new bandwidth, can be %NULL
+ *
+ * Returns: the primary 40/80/160 MHz channel center frequency, or -1
+ * for errors, updating the punctured bitmap
+ */
+int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef,
+ enum nl80211_chan_width primary_chan_width,
+ u16 *punctured);
+
+/**
* nl80211_send_chandef - sends the channel definition.
* @msg: the msg to send channel definition
* @chandef: the channel definition to check
@@ -1032,6 +1155,8 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
* @band_mask: which bands to check on
* @prohibited_flags: which channels to not consider usable,
* %IEEE80211_CHAN_DISABLED is always taken into account
+ *
+ * Return: %true if usable channels found, %false otherwise
*/
bool cfg80211_any_usable_channels(struct wiphy *wiphy,
unsigned long band_mask,
@@ -1289,6 +1414,7 @@ struct cfg80211_acl_data {
* struct cfg80211_fils_discovery - FILS discovery parameters from
* IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
*
+ * @update: Set to true if the feature configuration should be updated.
* @min_interval: Minimum packet interval in TUs (0 - 10000)
* @max_interval: Maximum packet interval in TUs (0 - 10000)
* @tmpl_len: Template length
@@ -1296,6 +1422,7 @@ struct cfg80211_acl_data {
* frame headers.
*/
struct cfg80211_fils_discovery {
+ bool update;
u32 min_interval;
u32 max_interval;
size_t tmpl_len;
@@ -1306,6 +1433,7 @@ struct cfg80211_fils_discovery {
* struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe
* response parameters in 6GHz.
*
+ * @update: Set to true if the feature configuration should be updated.
* @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned
* in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive
* scanning
@@ -1313,6 +1441,7 @@ struct cfg80211_fils_discovery {
* @tmpl: Template data for probe response
*/
struct cfg80211_unsol_bcast_probe_resp {
+ bool update;
u32 interval;
size_t tmpl_len;
const u8 *tmpl;
@@ -1359,9 +1488,6 @@ struct cfg80211_unsol_bcast_probe_resp {
* @fils_discovery: FILS discovery transmission parameters
* @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
* @mbssid_config: AP settings for multiple bssid
- * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
- * a 20 MHz channel, lowest bit corresponding to the lowest channel.
- * Bit set to 1 indicates that the channel is punctured.
*/
struct cfg80211_ap_settings {
struct cfg80211_chan_def chandef;
@@ -1396,7 +1522,22 @@ struct cfg80211_ap_settings {
struct cfg80211_fils_discovery fils_discovery;
struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
struct cfg80211_mbssid_config mbssid_config;
- u16 punct_bitmap;
+};
+
+
+/**
+ * struct cfg80211_ap_update - AP configuration update
+ *
+ * Subset of &struct cfg80211_ap_settings, for updating a running AP.
+ *
+ * @beacon: beacon data
+ * @fils_discovery: FILS discovery transmission parameters
+ * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
+ */
+struct cfg80211_ap_update {
+ struct cfg80211_beacon_data beacon;
+ struct cfg80211_fils_discovery fils_discovery;
+ struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
};
/**
@@ -1414,9 +1555,8 @@ struct cfg80211_ap_settings {
* @radar_required: whether radar detection is required on the new channel
* @block_tx: whether transmissions should be blocked while changing
* @count: number of beacons until switch
- * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
- * a 20 MHz channel, lowest bit corresponding to the lowest channel.
- * Bit set to 1 indicates that the channel is punctured.
+ * @link_id: defines the link on which channel switch is expected during
+ * MLO. 0 in case of non-MLO.
*/
struct cfg80211_csa_settings {
struct cfg80211_chan_def chandef;
@@ -1429,7 +1569,7 @@ struct cfg80211_csa_settings {
bool radar_required;
bool block_tx;
u8 count;
- u16 punct_bitmap;
+ u8 link_id;
};
/**
@@ -1443,6 +1583,8 @@ struct cfg80211_csa_settings {
* @beacon_next: beacon data to be used after the color change
* @count: number of beacons until the color change
* @color: the color used after the change
+ * @link_id: defines the link on which color change is expected during MLO.
+ * 0 in case of non-MLO.
*/
struct cfg80211_color_change_settings {
struct cfg80211_beacon_data beacon_color_change;
@@ -1451,6 +1593,7 @@ struct cfg80211_color_change_settings {
struct cfg80211_beacon_data beacon_next;
u8 count;
u8 color;
+ u8 link_id;
};
/**
@@ -1458,6 +1601,7 @@ struct cfg80211_color_change_settings {
*
* Used to pass interface combination parameters
*
+ * @radio_idx: wiphy radio index or -1 for global
* @num_different_channels: the number of different channels we want
* to use for verification
* @radar_detect: a bitmap where each bit corresponds to a channel
@@ -1471,6 +1615,7 @@ struct cfg80211_color_change_settings {
* the verification
*/
struct iface_combination_params {
+ int radio_idx;
int num_different_channels;
u8 radar_detect;
int iftype_num[NUM_NL80211_IFTYPES];
@@ -1568,6 +1713,21 @@ struct link_station_del_parameters {
};
/**
+ * struct cfg80211_ttlm_params: TID to link mapping parameters
+ *
+ * Used for setting a TID to link mapping.
+ *
+ * @dlink: Downlink TID to link mapping, as defined in section 9.4.2.314
+ * (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
+ * @ulink: Uplink TID to link mapping, as defined in section 9.4.2.314
+ * (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
+ */
+struct cfg80211_ttlm_params {
+ u16 dlink[8];
+ u16 ulink[8];
+};
+
+/**
* struct station_parameters - station parameters
*
* Used to change and create a new station.
@@ -1637,11 +1797,15 @@ struct station_parameters {
* @subtype: Management frame subtype to use for indicating removal
* (10 = Disassociation, 12 = Deauthentication)
* @reason_code: Reason code for the Disassociation/Deauthentication frame
+ * @link_id: Link ID indicating a link that stations to be flushed must be
+ * using; valid only for MLO, but can also be -1 for MLO to really
+ * remove all stations.
*/
struct station_del_parameters {
const u8 *mac;
u8 subtype;
u16 reason_code;
+ int link_id;
};
/**
@@ -1682,9 +1846,11 @@ enum cfg80211_station_type {
*
* Utility function for the @change_station driver method. Call this function
* with the appropriate station type looking up the station (and checking that
- * it exists). It will verify whether the station change is acceptable, and if
- * not will return an error code. Note that it may modify the parameters for
- * backward compatibility reasons, so don't use them before calling this.
+ * it exists). It will verify whether the station change is acceptable.
+ *
+ * Return: 0 if the change is acceptable, otherwise an error code. Note that
+ * it may modify the parameters for backward compatibility reasons, so don't
+ * use them before calling this.
*/
int cfg80211_check_station_change(struct wiphy *wiphy,
struct station_parameters *params,
@@ -1799,9 +1965,9 @@ struct rate_info {
* @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled
*/
enum bss_param_flags {
- BSS_PARAM_FLAGS_CTS_PROT = 1<<0,
- BSS_PARAM_FLAGS_SHORT_PREAMBLE = 1<<1,
- BSS_PARAM_FLAGS_SHORT_SLOT_TIME = 1<<2,
+ BSS_PARAM_FLAGS_CTS_PROT = BIT(0),
+ BSS_PARAM_FLAGS_SHORT_PREAMBLE = BIT(1),
+ BSS_PARAM_FLAGS_SHORT_SLOT_TIME = BIT(2),
};
/**
@@ -2042,7 +2208,7 @@ struct cfg80211_sar_sub_specs {
struct cfg80211_sar_specs {
enum nl80211_sar_type type;
u32 num_sub_specs;
- struct cfg80211_sar_sub_specs sub_specs[];
+ struct cfg80211_sar_sub_specs sub_specs[] __counted_by(num_sub_specs);
};
@@ -2078,7 +2244,7 @@ struct cfg80211_sar_capa {
* @mac_addr: the mac address of the station of interest
* @sinfo: pointer to the structure to fill with the information
*
- * Returns 0 on success and sinfo is filled with the available information
+ * Return: 0 on success and sinfo is filled with the available information
* otherwise returns a negative error code and the content of sinfo has to be
* considered undefined.
*/
@@ -2108,13 +2274,13 @@ static inline int cfg80211_get_station(struct net_device *dev,
* @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address
*/
enum monitor_flags {
- MONITOR_FLAG_CHANGED = 1<<__NL80211_MNTR_FLAG_INVALID,
- MONITOR_FLAG_FCSFAIL = 1<<NL80211_MNTR_FLAG_FCSFAIL,
- MONITOR_FLAG_PLCPFAIL = 1<<NL80211_MNTR_FLAG_PLCPFAIL,
- MONITOR_FLAG_CONTROL = 1<<NL80211_MNTR_FLAG_CONTROL,
- MONITOR_FLAG_OTHER_BSS = 1<<NL80211_MNTR_FLAG_OTHER_BSS,
- MONITOR_FLAG_COOK_FRAMES = 1<<NL80211_MNTR_FLAG_COOK_FRAMES,
- MONITOR_FLAG_ACTIVE = 1<<NL80211_MNTR_FLAG_ACTIVE,
+ MONITOR_FLAG_CHANGED = BIT(__NL80211_MNTR_FLAG_INVALID),
+ MONITOR_FLAG_FCSFAIL = BIT(NL80211_MNTR_FLAG_FCSFAIL),
+ MONITOR_FLAG_PLCPFAIL = BIT(NL80211_MNTR_FLAG_PLCPFAIL),
+ MONITOR_FLAG_CONTROL = BIT(NL80211_MNTR_FLAG_CONTROL),
+ MONITOR_FLAG_OTHER_BSS = BIT(NL80211_MNTR_FLAG_OTHER_BSS),
+ MONITOR_FLAG_COOK_FRAMES = BIT(NL80211_MNTR_FLAG_COOK_FRAMES),
+ MONITOR_FLAG_ACTIVE = BIT(NL80211_MNTR_FLAG_ACTIVE),
};
/**
@@ -2346,7 +2512,7 @@ struct mesh_config {
* @user_mpm: userspace handles all MPM functions
* @dtim_period: DTIM period to use
* @beacon_interval: beacon interval to use
- * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a]
+ * @mcast_rate: multicast rate for Mesh Node [6Mbps is the default for 802.11a]
* @basic_rates: basic rates to use when creating the mesh
* @beacon_rate: bitrate to be used for beacons
* @userspace_handles_dfs: whether user space controls DFS operation, i.e.
@@ -2463,7 +2629,7 @@ struct cfg80211_scan_info {
* @short_ssid: short ssid to scan for
* @bssid: bssid to scan for
* @channel_idx: idx of the channel in the channel array in the scan request
- * which the above info relvant to
+ * which the above info is relevant to
* @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU
* @short_ssid_valid: @short_ssid is valid and can be used
* @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
@@ -2487,7 +2653,6 @@ struct cfg80211_scan_6ghz_params {
* @n_ssids: number of SSIDs
* @channels: channels to scan on.
* @n_channels: total number of channels to scan
- * @scan_width: channel width for scanning
* @ie: optional information element(s) to add into Probe Request or %NULL
* @ie_len: length of ie in octets
* @duration: how long to listen on each channel, in TUs. If
@@ -2512,12 +2677,13 @@ struct cfg80211_scan_6ghz_params {
* @n_6ghz_params: number of 6 GHz params
* @scan_6ghz_params: 6 GHz params
* @bssid: BSSID to scan for (most commonly, the wildcard BSSID)
+ * @tsf_report_link_id: for MLO, indicates the link ID of the BSS that should be
+ * used for TSF reporting. Can be set to -1 to indicate no preference.
*/
struct cfg80211_scan_request {
struct cfg80211_ssid *ssids;
int n_ssids;
u32 n_channels;
- enum nl80211_bss_scan_width scan_width;
const u8 *ie;
size_t ie_len;
u16 duration;
@@ -2541,9 +2707,10 @@ struct cfg80211_scan_request {
bool scan_6ghz;
u32 n_6ghz_params;
struct cfg80211_scan_6ghz_params *scan_6ghz_params;
+ s8 tsf_report_link_id;
/* keep last */
- struct ieee80211_channel *channels[] __counted_by(n_channels);
+ struct ieee80211_channel *channels[];
};
static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
@@ -2565,19 +2732,11 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
* @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match
* or no match (RSSI only)
* @rssi_thold: don't report scan results below this threshold (in s32 dBm)
- * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied
- * for filtering out scan results received. Drivers advertize this support
- * of band specific rssi based filtering through the feature capability
- * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band
- * specific rssi thresholds take precedence over rssi_thold, if specified.
- * If not specified for any band, it will be assigned with rssi_thold of
- * corresponding matchset.
*/
struct cfg80211_match_set {
struct cfg80211_ssid ssid;
u8 bssid[ETH_ALEN];
s32 rssi_thold;
- s32 per_band_rssi_thold[NUM_NL80211_BANDS];
};
/**
@@ -2612,14 +2771,13 @@ struct cfg80211_bss_select_adjust {
* @ssids: SSIDs to scan for (passed in the probe_reqs in active scans)
* @n_ssids: number of SSIDs
* @n_channels: total number of channels to scan
- * @scan_width: channel width for scanning
* @ie: optional information element(s) to add into Probe Request or %NULL
* @ie_len: length of ie in octets
* @flags: control flags from &enum nl80211_scan_flags
* @match_sets: sets of parameters to be matched for a scan result
* entry to be considered valid and to be passed to the host
* (others are filtered out).
- * If ommited, all results are passed.
+ * If omitted, all results are passed.
* @n_match_sets: number of match sets
* @report_results: indicates that results were reported for this request
* @wiphy: the wiphy this was for
@@ -2653,14 +2811,13 @@ struct cfg80211_bss_select_adjust {
* to the specified band while deciding whether a better BSS is reported
* using @relative_rssi. If delta is a negative number, the BSSs that
* belong to the specified band will be penalized by delta dB in relative
- * comparisions.
+ * comparisons.
*/
struct cfg80211_sched_scan_request {
u64 reqid;
struct cfg80211_ssid *ssids;
int n_ssids;
u32 n_channels;
- enum nl80211_bss_scan_width scan_width;
const u8 *ie;
size_t ie_len;
u32 flags;
@@ -2689,7 +2846,7 @@ struct cfg80211_sched_scan_request {
struct list_head list;
/* keep last */
- struct ieee80211_channel *channels[];
+ struct ieee80211_channel *channels[] __counted_by(n_channels);
};
/**
@@ -2708,7 +2865,6 @@ enum cfg80211_signal_type {
/**
* struct cfg80211_inform_bss - BSS inform data
* @chan: channel the frame was received on
- * @scan_width: scan width that was used
* @signal: signal strength value, according to the wiphy's
* signal type
* @boottime_ns: timestamp (CLOCK_BOOTTIME) when the information was
@@ -2724,11 +2880,17 @@ enum cfg80211_signal_type {
* the BSS that requested the scan in which the beacon/probe was received.
* @chains: bitmask for filled values in @chain_signal.
* @chain_signal: per-chain signal strength of last received BSS in dBm.
+ * @restrict_use: restrict usage, if not set, assume @use_for is
+ * %NL80211_BSS_USE_FOR_NORMAL.
+ * @use_for: bitmap of possible usage for this BSS, see
+ * &enum nl80211_bss_use_for
+ * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
+ * if @restrict_use is set and @use_for is zero (empty); may be 0 for
+ * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
* @drv_data: Data to be passed through to @inform_bss
*/
struct cfg80211_inform_bss {
struct ieee80211_channel *chan;
- enum nl80211_bss_scan_width scan_width;
s32 signal;
u64 boottime_ns;
u64 parent_tsf;
@@ -2736,6 +2898,9 @@ struct cfg80211_inform_bss {
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+ u8 restrict_use:1, use_for:7;
+ u8 cannot_use_reasons;
+
void *drv_data;
};
@@ -2762,7 +2927,6 @@ struct cfg80211_bss_ies {
* for use in scan results and similar.
*
* @channel: channel this BSS is on
- * @scan_width: width of the control channel
* @bssid: BSSID of the BSS
* @beacon_interval: the beacon interval as from the frame
* @capability: the capability field in host byte order
@@ -2775,6 +2939,8 @@ struct cfg80211_bss_ies {
* own the beacon_ies, but they're just pointers to the ones from the
* @hidden_beacon_bss struct)
* @proberesp_ies: the information elements from the last Probe Response frame
+ * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame,
+ * cannot rely on it having valid data
* @hidden_beacon_bss: in case this BSS struct represents a probe response from
* a BSS that hides the SSID in its beacon, this points to the BSS struct
* that holds the beacon data. @beacon_ies is still valid, of course, and
@@ -2788,11 +2954,15 @@ struct cfg80211_bss_ies {
* @chain_signal: per-chain signal strength of last received BSS in dBm.
* @bssid_index: index in the multiple BSS set
* @max_bssid_indicator: max number of members in the BSS set
+ * @use_for: bitmap of possible usage for this BSS, see
+ * &enum nl80211_bss_use_for
+ * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
+ * if @restrict_use is set and @use_for is zero (empty); may be 0 for
+ * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
* @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
*/
struct cfg80211_bss {
struct ieee80211_channel *channel;
- enum nl80211_bss_scan_width scan_width;
const struct cfg80211_bss_ies __rcu *ies;
const struct cfg80211_bss_ies __rcu *beacon_ies;
@@ -2811,9 +2981,14 @@ struct cfg80211_bss {
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+ u8 proberesp_ecsa_stuck:1;
+
u8 bssid_index;
u8 max_bssid_indicator;
+ u8 use_for;
+ u8 cannot_use_reasons;
+
u8 priv[] __aligned(sizeof(void *));
};
@@ -2891,12 +3066,15 @@ struct cfg80211_auth_request {
* @elems_len: length of the elements
* @disabled: If set this link should be included during association etc. but it
* should not be used until enabled by the AP MLD.
+ * @error: per-link error code, must be <= 0. If there is an error, then the
+ * operation as a whole must fail.
*/
struct cfg80211_assoc_link {
struct cfg80211_bss *bss;
const u8 *elems;
size_t elems_len;
bool disabled;
+ int error;
};
/**
@@ -2914,6 +3092,7 @@ struct cfg80211_assoc_link {
* @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links.
* Drivers shall disable MLO features for the current association if this
* flag is not set.
+ * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any)
*/
enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HT = BIT(0),
@@ -2923,6 +3102,7 @@ enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HE = BIT(4),
ASSOC_REQ_DISABLE_EHT = BIT(5),
CONNECT_REQ_MLO_SUPPORT = BIT(6),
+ ASSOC_REQ_SPP_AMSDU = BIT(7),
};
/**
@@ -3088,8 +3268,8 @@ struct cfg80211_ibss_params {
*
* @behaviour: requested BSS selection behaviour.
* @param: parameters for requestion behaviour.
- * @band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF.
- * @adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST.
+ * @param.band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF.
+ * @param.adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST.
*/
struct cfg80211_bss_selection {
enum nl80211_bss_select_attr behaviour;
@@ -3227,15 +3407,15 @@ enum cfg80211_connect_params_changed {
* @WIPHY_PARAM_TXQ_QUANTUM: TXQ scheduler quantum
*/
enum wiphy_params_flags {
- WIPHY_PARAM_RETRY_SHORT = 1 << 0,
- WIPHY_PARAM_RETRY_LONG = 1 << 1,
- WIPHY_PARAM_FRAG_THRESHOLD = 1 << 2,
- WIPHY_PARAM_RTS_THRESHOLD = 1 << 3,
- WIPHY_PARAM_COVERAGE_CLASS = 1 << 4,
- WIPHY_PARAM_DYN_ACK = 1 << 5,
- WIPHY_PARAM_TXQ_LIMIT = 1 << 6,
- WIPHY_PARAM_TXQ_MEMORY_LIMIT = 1 << 7,
- WIPHY_PARAM_TXQ_QUANTUM = 1 << 8,
+ WIPHY_PARAM_RETRY_SHORT = BIT(0),
+ WIPHY_PARAM_RETRY_LONG = BIT(1),
+ WIPHY_PARAM_FRAG_THRESHOLD = BIT(2),
+ WIPHY_PARAM_RTS_THRESHOLD = BIT(3),
+ WIPHY_PARAM_COVERAGE_CLASS = BIT(4),
+ WIPHY_PARAM_DYN_ACK = BIT(5),
+ WIPHY_PARAM_TXQ_LIMIT = BIT(6),
+ WIPHY_PARAM_TXQ_MEMORY_LIMIT = BIT(7),
+ WIPHY_PARAM_TXQ_QUANTUM = BIT(8),
};
#define IEEE80211_DEFAULT_AIRTIME_WEIGHT 256
@@ -3394,8 +3574,8 @@ struct cfg80211_coalesce_rules {
* @n_rules: number of rules
*/
struct cfg80211_coalesce {
- struct cfg80211_coalesce_rules *rules;
int n_rules;
+ struct cfg80211_coalesce_rules rules[] __counted_by(n_rules);
};
/**
@@ -3410,7 +3590,7 @@ struct cfg80211_coalesce {
struct cfg80211_wowlan_nd_match {
struct cfg80211_ssid ssid;
int n_channels;
- u32 channels[];
+ u32 channels[] __counted_by(n_channels);
};
/**
@@ -3424,7 +3604,7 @@ struct cfg80211_wowlan_nd_match {
*/
struct cfg80211_wowlan_nd_info {
int n_matches;
- struct cfg80211_wowlan_nd_match *matches[];
+ struct cfg80211_wowlan_nd_match *matches[] __counted_by(n_matches);
};
/**
@@ -3447,12 +3627,15 @@ struct cfg80211_wowlan_nd_info {
* @tcp_connlost: TCP connection lost or failed to establish
* @tcp_nomoretokens: TCP data ran out of tokens
* @net_detect: if not %NULL, woke up because of net detect
+ * @unprot_deauth_disassoc: woke up due to unprotected deauth or
+ * disassoc frame (in MFP).
*/
struct cfg80211_wowlan_wakeup {
bool disconnect, magic_pkt, gtk_rekey_failure,
eap_identity_req, four_way_handshake,
rfkill_release, packet_80211,
- tcp_match, tcp_connlost, tcp_nomoretokens;
+ tcp_match, tcp_connlost, tcp_nomoretokens,
+ unprot_deauth_disassoc;
s32 pattern_idx;
u32 packet_present_len, packet_len;
const void *packet;
@@ -3495,7 +3678,7 @@ struct cfg80211_update_ft_ies_params {
* This structure provides information needed to transmit a mgmt frame
*
* @chan: channel to use
- * @offchan: indicates wether off channel operation is required
+ * @offchan: indicates whether off channel operation is required
* @wait: duration for ROC
* @buf: buffer to transmit
* @len: buffer length
@@ -3613,7 +3796,7 @@ struct cfg80211_nan_func_filter {
* @publish_bcast: if true, the solicited publish should be broadcasted
* @subscribe_active: if true, the subscribe is active
* @followup_id: the instance ID for follow up
- * @followup_reqid: the requestor instance ID for follow up
+ * @followup_reqid: the requester instance ID for follow up
* @followup_dest: MAC address of the recipient of the follow up
* @ttl: time to live counter in DW.
* @serv_spec_info: Service Specific Info
@@ -4401,6 +4584,9 @@ struct mgmt_frame_regs {
* @del_link_station: Remove a link of a station.
*
* @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
+ * @set_ttlm: set the TID to link mapping.
+ * @get_radio_mask: get bitmask of radios in use.
+ * (invoked with the wiphy mutex held)
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4450,7 +4636,7 @@ struct cfg80211_ops {
int (*start_ap)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ap_settings *settings);
int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev,
- struct cfg80211_beacon_data *info);
+ struct cfg80211_ap_update *info);
int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev,
unsigned int link_id);
@@ -4654,9 +4840,9 @@ struct cfg80211_ops {
int (*start_radar_detection)(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms);
+ u32 cac_time_ms, int link_id);
void (*end_cac)(struct wiphy *wiphy,
- struct net_device *dev);
+ struct net_device *dev, unsigned int link_id);
int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_update_ft_ies_params *ftie);
int (*crit_proto_start)(struct wiphy *wiphy,
@@ -4760,6 +4946,9 @@ struct cfg80211_ops {
struct link_station_del_parameters *params);
int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_set_hw_timestamp *hwts);
+ int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_ttlm_params *params);
+ u32 (*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev);
};
/*
@@ -4771,7 +4960,7 @@ struct cfg80211_ops {
* enum wiphy_flags - wiphy capability flags
*
* @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split
- * into two, first for legacy bands and second for UHB.
+ * into two, first for legacy bands and second for 6 GHz.
* @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
* wiphy at all
* @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
@@ -4816,6 +5005,10 @@ struct cfg80211_ops {
* @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys.
* @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for
* NL80211_REGDOM_SET_BY_DRIVER.
+ * @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver
+ * set this flag to update channels on beacon hints.
+ * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link
+ * of an NSTR mobile AP MLD.
* @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device
*/
enum wiphy_flags {
@@ -4831,7 +5024,7 @@ enum wiphy_flags {
WIPHY_FLAG_DISABLE_WEXT = BIT(9),
WIPHY_FLAG_MESH_AUTH = BIT(10),
WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11),
- /* use hole at 12 */
+ WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12),
WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13),
WIPHY_FLAG_AP_UAPSD = BIT(14),
WIPHY_FLAG_SUPPORTS_TDLS = BIT(15),
@@ -4844,6 +5037,7 @@ enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22),
WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23),
WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER = BIT(24),
+ WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON = BIT(25),
};
/**
@@ -4860,7 +5054,9 @@ struct ieee80211_iface_limit {
* struct ieee80211_iface_combination - possible interface combination
*
* With this structure the driver can describe which interface
- * combinations it supports concurrently.
+ * combinations it supports concurrently. When set in a struct wiphy_radio,
+ * the combinations refer to combinations of interfaces currently active on
+ * that radio.
*
* Examples:
*
@@ -5158,6 +5354,8 @@ struct wiphy_iftype_ext_capab {
* cfg80211_get_iftype_ext_capa - lookup interface type extended capability
* @wiphy: the wiphy to look up from
* @type: the interface type to look up
+ *
+ * Return: The extended capability for the given interface @type, may be %NULL
*/
const struct wiphy_iftype_ext_capab *
cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type);
@@ -5218,6 +5416,38 @@ struct wiphy_iftype_akm_suites {
int n_akm_suites;
};
+/**
+ * struct wiphy_radio_freq_range - wiphy frequency range
+ * @start_freq: start range edge frequency (kHz)
+ * @end_freq: end range edge frequency (kHz)
+ */
+struct wiphy_radio_freq_range {
+ u32 start_freq;
+ u32 end_freq;
+};
+
+
+/**
+ * struct wiphy_radio - physical radio of a wiphy
+ * This structure describes a physical radio belonging to a wiphy.
+ * It is used to describe concurrent-channel capabilities. Only one channel
+ * can be active on the radio described by struct wiphy_radio.
+ *
+ * @freq_range: frequency range that the radio can operate on.
+ * @n_freq_range: number of elements in @freq_range
+ *
+ * @iface_combinations: Valid interface combinations array, should not
+ * list single interface types.
+ * @n_iface_combinations: number of entries in @iface_combinations array.
+ */
+struct wiphy_radio {
+ const struct wiphy_radio_freq_range *freq_range;
+ int n_freq_range;
+
+ const struct ieee80211_iface_combination *iface_combinations;
+ int n_iface_combinations;
+};
+
#define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff
/**
@@ -5436,6 +5666,9 @@ struct wiphy_iftype_akm_suites {
* A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver
* supports enabling HW timestamping for all peers (i.e. no need to
* specify a mac address).
+ *
+ * @radio: radios belonging to this wiphy
+ * @n_radio: number of radios
*/
struct wiphy {
struct mutex mtx;
@@ -5586,6 +5819,9 @@ struct wiphy {
u16 hw_timestamp_max_peers;
+ int n_radio;
+ const struct wiphy_radio *radio;
+
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -5728,6 +5964,10 @@ int wiphy_register(struct wiphy *wiphy);
/**
* get_wiphy_regdom - get custom regdomain for the given wiphy
* @wiphy: the wiphy to get the regdomain from
+ *
+ * Context: Requires any of RTNL, wiphy mutex or RCU protection.
+ *
+ * Return: pointer to the regulatory domain associated with the wiphy
*/
const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy);
@@ -5884,7 +6124,7 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy,
/**
* wiphy_delayed_work_flush - flush previously queued delayed work
* @wiphy: the wiphy, for debug purposes
- * @work: the work to flush
+ * @dwork: the delayed work to flush
*
* Flush the work (i.e. run it if pending). This must be called
* under the wiphy mutex acquired by wiphy_lock().
@@ -5893,6 +6133,65 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
struct wiphy_delayed_work *dwork);
/**
+ * wiphy_delayed_work_pending - Find out whether a wiphy delayable
+ * work item is currently pending.
+ *
+ * @wiphy: the wiphy, for debug purposes
+ * @dwork: the delayed work in question
+ *
+ * Return: true if timer is pending, false otherwise
+ *
+ * How wiphy_delayed_work_queue() works is by setting a timer which
+ * when it expires calls wiphy_work_queue() to queue the wiphy work.
+ * Because wiphy_delayed_work_queue() uses mod_timer(), if it is
+ * called twice and the second call happens before the first call
+ * deadline, the work will rescheduled for the second deadline and
+ * won't run before that.
+ *
+ * wiphy_delayed_work_pending() can be used to detect if calling
+ * wiphy_work_delayed_work_queue() would start a new work schedule
+ * or delayed a previous one. As seen below it cannot be used to
+ * detect precisely if the work has finished to execute nor if it
+ * is currently executing.
+ *
+ * CPU0 CPU1
+ * wiphy_delayed_work_queue(wk)
+ * mod_timer(wk->timer)
+ * wiphy_delayed_work_pending(wk) -> true
+ *
+ * [...]
+ * expire_timers(wk->timer)
+ * detach_timer(wk->timer)
+ * wiphy_delayed_work_pending(wk) -> false
+ * wk->timer->function() |
+ * wiphy_work_queue(wk) | delayed work pending
+ * list_add_tail() | returns false but
+ * queue_work(cfg80211_wiphy_work) | wk->func() has not
+ * | been run yet
+ * [...] |
+ * cfg80211_wiphy_work() |
+ * wk->func() V
+ *
+ */
+bool wiphy_delayed_work_pending(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork);
+
+/**
+ * enum ieee80211_ap_reg_power - regulatory power for an Access Point
+ *
+ * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode
+ * @IEEE80211_REG_LPI_AP: Indoor Access Point
+ * @IEEE80211_REG_SP_AP: Standard power Access Point
+ * @IEEE80211_REG_VLP_AP: Very low power Access Point
+ */
+enum ieee80211_ap_reg_power {
+ IEEE80211_REG_UNSET_AP,
+ IEEE80211_REG_LPI_AP,
+ IEEE80211_REG_SP_AP,
+ IEEE80211_REG_VLP_AP,
+};
+
+/**
* struct wireless_dev - wireless device state
*
* For netdevs, this structure must be allocated by the driver
@@ -5920,7 +6219,6 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
* wireless device if it has no netdev
* @u: union containing data specific to @iftype
* @connected: indicates if connected or not (STA mode)
- * @bssid: (private) Used by the internal configuration code
* @wext: (private) Used by the internal wireless extensions compat code
* @wext.ibss: (private) IBSS data part of wext handling
* @wext.connect: (private) connection handling data
@@ -5940,16 +6238,9 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
* @mgmt_registrations: list of registrations for management frames
* @mgmt_registrations_need_update: mgmt registrations were updated,
* need to propagate the update to the driver
- * @mtx: mutex used to lock data in this struct, may be used by drivers
- * and some API functions require it held
- * @beacon_interval: beacon interval used on this device for transmitting
- * beacons, 0 when not valid
* @address: The address for this device, valid only if @netdev is %NULL
* @is_running: true if this is a non-netdev device that has been started, e.g.
* the P2P Device.
- * @cac_started: true if DFS channel availability check has been started
- * @cac_start_time: timestamp (jiffies) when the dfs state was entered.
- * @cac_time_ms: CAC time in ms
* @ps: powersave mode is enabled
* @ps_timeout: dynamic powersave timeout
* @ap_unexpected_nlportid: (private) netlink port ID of application
@@ -5973,6 +6264,11 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
* unprotected beacon report
* @links: array of %IEEE80211_MLD_MAX_NUM_LINKS elements containing @addr
* @ap and @client for each link
+ * @links.cac_started: true if DFS channel availability check has been
+ * started
+ * @links.cac_start_time: timestamp (jiffies) when the dfs state was
+ * entered.
+ * @links.cac_time_ms: CAC time in ms
* @valid_links: bitmap describing what elements of @links are valid
*/
struct wireless_dev {
@@ -5988,8 +6284,6 @@ struct wireless_dev {
struct list_head mgmt_registrations;
u8 mgmt_registrations_need_update:1;
- struct mutex mtx;
-
bool use_4addr, is_running, registered, registering;
u8 address[ETH_ALEN] __aligned(sizeof(u16));
@@ -6016,11 +6310,6 @@ struct wireless_dev {
u32 owner_nlportid;
bool nl_owner_dead;
- /* FIXME: need to rework radar detection for MLO */
- bool cac_started;
- unsigned long cac_start_time;
- unsigned int cac_time_ms;
-
#ifdef CONFIG_CFG80211_WEXT
/* wext data */
struct {
@@ -6056,7 +6345,7 @@ struct wireless_dev {
int beacon_interval;
struct cfg80211_chan_def preset_chandef;
struct cfg80211_chan_def chandef;
- u8 id[IEEE80211_MAX_SSID_LEN];
+ u8 id[IEEE80211_MAX_MESH_ID_LEN];
u8 id_len, id_up_len;
} mesh;
struct {
@@ -6087,6 +6376,10 @@ struct wireless_dev {
struct cfg80211_internal_bss *current_bss;
} client;
};
+
+ bool cac_started;
+ unsigned long cac_start_time;
+ unsigned int cac_time_ms;
} links[IEEE80211_MLD_MAX_NUM_LINKS];
u16 valid_links;
};
@@ -6252,6 +6545,8 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq)
*
* The Preferred Scanning Channels (PSC) are defined in
* Draft IEEE P802.11ax/D5.0, 26.17.2.3.3
+ *
+ * Return: %true if channel is a PSC, %false otherwise
*/
static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
{
@@ -6262,6 +6557,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
}
/**
+ * cfg80211_radio_chandef_valid - Check if the radio supports the chandef
+ *
+ * @radio: wiphy radio
+ * @chandef: chandef for current channel
+ *
+ * Return: whether or not the given chandef is valid for the given radio
+ */
+bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio,
+ const struct cfg80211_chan_def *chandef);
+
+/**
* ieee80211_get_response_rate - get basic rate for a given rate
*
* @sband: the band to look for rates in
@@ -6280,13 +6586,11 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
/**
* ieee80211_mandatory_rates - get mandatory rates for a given band
* @sband: the band to look for rates in
- * @scan_width: width of the control channel
*
- * This function returns a bitmap of the mandatory rates for the given
- * band, bits are set according to the rate position in the bitrates array.
+ * Return: a bitmap of the mandatory rates for the given band, bits
+ * are set according to the rate position in the bitrates array.
*/
-u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
- enum nl80211_bss_scan_width scan_width);
+u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband);
/*
* Radiotap parsing functions -- for controlled injection support
@@ -6498,6 +6802,8 @@ bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto);
* header to the ethernet header (if present).
*
* @skb: The 802.3 frame with embedded mesh header
+ *
+ * Return: 0 on success. Non-zero on error.
*/
int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb);
@@ -6627,7 +6933,7 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
* @ies: data consisting of IEs
* @len: length of data
*
- * Return: %NULL if the etended element could not be found or if
+ * Return: %NULL if the extended element could not be found or if
* the element is invalid (claims to be longer than the given
* data) or if the byte array doesn't match; otherwise return the
* requested element struct.
@@ -6706,13 +7012,45 @@ cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
}
/**
+ * enum cfg80211_rnr_iter_ret - reduced neighbor report iteration state
+ * @RNR_ITER_CONTINUE: continue iterating with the next entry
+ * @RNR_ITER_BREAK: break iteration and return success
+ * @RNR_ITER_ERROR: break iteration and return error
+ */
+enum cfg80211_rnr_iter_ret {
+ RNR_ITER_CONTINUE,
+ RNR_ITER_BREAK,
+ RNR_ITER_ERROR,
+};
+
+/**
+ * cfg80211_iter_rnr - iterate reduced neighbor report entries
+ * @elems: the frame elements to iterate RNR elements and then
+ * their entries in
+ * @elems_len: length of the elements
+ * @iter: iteration function, see also &enum cfg80211_rnr_iter_ret
+ * for the return value
+ * @iter_data: additional data passed to the iteration function
+ * Return: %true on success (after successfully iterating all entries
+ * or if the iteration function returned %RNR_ITER_BREAK),
+ * %false on error (iteration function returned %RNR_ITER_ERROR
+ * or elements were malformed.)
+ */
+bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len,
+ enum cfg80211_rnr_iter_ret
+ (*iter)(void *data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len),
+ void *iter_data);
+
+/**
* cfg80211_defragment_element - Defrag the given element data into a buffer
*
* @elem: the element to defragment
* @ies: elements where @elem is contained
* @ieslen: length of @ies
- * @data: buffer to store element data
- * @data_len: length of @data
+ * @data: buffer to store element data, or %NULL to just determine size
+ * @data_len: length of @data, or 0
* @frag_id: the element ID of fragments
*
* Return: length of @data, or -EINVAL on error
@@ -6774,7 +7112,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
/**
* regulatory_set_wiphy_regd - set regdom info for self managed drivers
* @wiphy: the wireless device we want to process the regulatory domain on
- * @rd: the regulatory domain informatoin to use for this wiphy
+ * @rd: the regulatory domain information to use for this wiphy
*
* Set the regulatory domain information for self-managed wiphys, only they
* may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more
@@ -6844,6 +7182,8 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
*
* You can use this to map the regulatory request initiator enum to a
* proper string representation.
+ *
+ * Return: pointer to string representation of the initiator
*/
const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
@@ -6852,6 +7192,8 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
* @wiphy: wiphy for which pre-CAC capability is checked.
*
* Pre-CAC is allowed only in some regdomains (notable ETSI).
+ *
+ * Return: %true if allowed, %false otherwise
*/
bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
@@ -6865,7 +7207,7 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
* Regulatory self-managed driver can use it to proactively
*
* @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried.
- * @freq: the freqency(in MHz) to be queried.
+ * @freq: the frequency (in MHz) to be queried.
* @rule: pointer to store the wmm rule from the regulatory db.
*
* Self-managed wireless drivers can use this function to query
@@ -6948,22 +7290,6 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
gfp_t gfp);
static inline struct cfg80211_bss * __must_check
-cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
- struct ieee80211_channel *rx_channel,
- enum nl80211_bss_scan_width scan_width,
- struct ieee80211_mgmt *mgmt, size_t len,
- s32 signal, gfp_t gfp)
-{
- struct cfg80211_inform_bss data = {
- .chan = rx_channel,
- .scan_width = scan_width,
- .signal = signal,
- };
-
- return cfg80211_inform_bss_frame_data(wiphy, &data, mgmt, len, gfp);
-}
-
-static inline struct cfg80211_bss * __must_check
cfg80211_inform_bss_frame(struct wiphy *wiphy,
struct ieee80211_channel *rx_channel,
struct ieee80211_mgmt *mgmt, size_t len,
@@ -6971,7 +7297,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
{
struct cfg80211_inform_bss data = {
.chan = rx_channel,
- .scan_width = NL80211_BSS_CHAN_WIDTH_20,
.signal = signal,
};
@@ -7003,6 +7328,8 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid,
* cfg80211_is_element_inherited - returns if element ID should be inherited
* @element: element to check
* @non_inherit_element: non inheritance element
+ *
+ * Return: %true if should be inherited, %false otherwise
*/
bool cfg80211_is_element_inherited(const struct element *element,
const struct element *non_inherit_element);
@@ -7015,6 +7342,8 @@ bool cfg80211_is_element_inherited(const struct element *element,
* @sub_elem: current MBSSID subelement (profile)
* @merged_ie: location of the merged profile
* @max_copy_len: max merged profile length
+ *
+ * Return: the number of bytes merged
*/
size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
const struct element *mbssid_elem,
@@ -7027,11 +7356,13 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
* from a beacon or probe response
* @CFG80211_BSS_FTYPE_BEACON: data comes from a beacon
* @CFG80211_BSS_FTYPE_PRESP: data comes from a probe response
+ * @CFG80211_BSS_FTYPE_S1G_BEACON: data comes from an S1G beacon
*/
enum cfg80211_bss_frame_type {
CFG80211_BSS_FTYPE_UNKNOWN,
CFG80211_BSS_FTYPE_BEACON,
CFG80211_BSS_FTYPE_PRESP,
+ CFG80211_BSS_FTYPE_S1G_BEACON,
};
/**
@@ -7040,12 +7371,29 @@ enum cfg80211_bss_frame_type {
* @ielen: length of IEs
* @band: enum nl80211_band of the channel
*
- * Returns the channel number, or -1 if none could be determined.
+ * Return: the channel number, or -1 if none could be determined.
*/
int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
enum nl80211_band band);
/**
+ * cfg80211_ssid_eq - compare two SSIDs
+ * @a: first SSID
+ * @b: second SSID
+ *
+ * Return: %true if SSIDs are equal, %false otherwise.
+ */
+static inline bool
+cfg80211_ssid_eq(struct cfg80211_ssid *a, struct cfg80211_ssid *b)
+{
+ if (WARN_ON(!a || !b))
+ return false;
+ if (a->ssid_len != b->ssid_len)
+ return false;
+ return memcmp(a->ssid, b->ssid, a->ssid_len) ? false : true;
+}
+
+/**
* cfg80211_inform_bss_data - inform cfg80211 of a new BSS
*
* @wiphy: the wiphy reporting the BSS
@@ -7074,26 +7422,6 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
gfp_t gfp);
static inline struct cfg80211_bss * __must_check
-cfg80211_inform_bss_width(struct wiphy *wiphy,
- struct ieee80211_channel *rx_channel,
- enum nl80211_bss_scan_width scan_width,
- enum cfg80211_bss_frame_type ftype,
- const u8 *bssid, u64 tsf, u16 capability,
- u16 beacon_interval, const u8 *ie, size_t ielen,
- s32 signal, gfp_t gfp)
-{
- struct cfg80211_inform_bss data = {
- .chan = rx_channel,
- .scan_width = scan_width,
- .signal = signal,
- };
-
- return cfg80211_inform_bss_data(wiphy, &data, ftype, bssid, tsf,
- capability, beacon_interval, ie, ielen,
- gfp);
-}
-
-static inline struct cfg80211_bss * __must_check
cfg80211_inform_bss(struct wiphy *wiphy,
struct ieee80211_channel *rx_channel,
enum cfg80211_bss_frame_type ftype,
@@ -7103,7 +7431,6 @@ cfg80211_inform_bss(struct wiphy *wiphy,
{
struct cfg80211_inform_bss data = {
.chan = rx_channel,
- .scan_width = NL80211_BSS_CHAN_WIDTH_20,
.signal = signal,
};
@@ -7113,6 +7440,27 @@ cfg80211_inform_bss(struct wiphy *wiphy,
}
/**
+ * __cfg80211_get_bss - get a BSS reference
+ * @wiphy: the wiphy this BSS struct belongs to
+ * @channel: the channel to search on (or %NULL)
+ * @bssid: the desired BSSID (or %NULL)
+ * @ssid: the desired SSID (or %NULL)
+ * @ssid_len: length of the SSID (or 0)
+ * @bss_type: type of BSS, see &enum ieee80211_bss_type
+ * @privacy: privacy filter, see &enum ieee80211_privacy
+ * @use_for: indicates which use is intended
+ *
+ * Return: Reference-counted BSS on success. %NULL on error.
+ */
+struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy,
+ struct ieee80211_channel *channel,
+ const u8 *bssid,
+ const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy,
+ u32 use_for);
+
+/**
* cfg80211_get_bss - get a BSS reference
* @wiphy: the wiphy this BSS struct belongs to
* @channel: the channel to search on (or %NULL)
@@ -7121,13 +7469,22 @@ cfg80211_inform_bss(struct wiphy *wiphy,
* @ssid_len: length of the SSID (or 0)
* @bss_type: type of BSS, see &enum ieee80211_bss_type
* @privacy: privacy filter, see &enum ieee80211_privacy
+ *
+ * This version implies regular usage, %NL80211_BSS_USE_FOR_NORMAL.
+ *
+ * Return: Reference-counted BSS on success. %NULL on error.
*/
-struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
- const u8 *bssid,
- const u8 *ssid, size_t ssid_len,
- enum ieee80211_bss_type bss_type,
- enum ieee80211_privacy privacy);
+static inline struct cfg80211_bss *
+cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel,
+ const u8 *bssid, const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy)
+{
+ return __cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len,
+ bss_type, privacy,
+ NL80211_BSS_USE_FOR_NORMAL);
+}
+
static inline struct cfg80211_bss *
cfg80211_get_ibss(struct wiphy *wiphy,
struct ieee80211_channel *channel,
@@ -7188,19 +7545,6 @@ void cfg80211_bss_iter(struct wiphy *wiphy,
void *data),
void *iter_data);
-static inline enum nl80211_bss_scan_width
-cfg80211_chandef_to_scan_width(const struct cfg80211_chan_def *chandef)
-{
- switch (chandef->width) {
- case NL80211_CHAN_WIDTH_5:
- return NL80211_BSS_CHAN_WIDTH_5;
- case NL80211_CHAN_WIDTH_10:
- return NL80211_BSS_CHAN_WIDTH_10;
- default:
- return NL80211_BSS_CHAN_WIDTH_20;
- }
-}
-
/**
* cfg80211_rx_mlme_mgmt - notification of processed MLME management frame
* @dev: network device
@@ -7233,9 +7577,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len);
void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
/**
- * struct cfg80211_rx_assoc_resp - association response data
- * @bss: the BSS that association was requested with, ownership of the pointer
- * moves to cfg80211 in the call to cfg80211_rx_assoc_resp()
+ * struct cfg80211_rx_assoc_resp_data - association response data
* @buf: (Re)Association Response frame (header + body)
* @len: length of the frame data
* @uapsd_queues: bitmap of queues configured for uapsd. Same format
@@ -7245,10 +7587,12 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
* @ap_mld_addr: AP MLD address (in case of MLO)
* @links: per-link information indexed by link ID, use links[0] for
* non-MLO connections
+ * @links.bss: the BSS that association was requested with, ownership of the
+ * pointer moves to cfg80211 in the call to cfg80211_rx_assoc_resp()
* @links.status: Set this (along with a BSS pointer) for links that
* were rejected by the AP.
*/
-struct cfg80211_rx_assoc_resp {
+struct cfg80211_rx_assoc_resp_data {
const u8 *buf;
size_t len;
const u8 *req_ies;
@@ -7265,7 +7609,7 @@ struct cfg80211_rx_assoc_resp {
/**
* cfg80211_rx_assoc_resp - notification of processed association response
* @dev: network device
- * @data: association response data, &struct cfg80211_rx_assoc_resp
+ * @data: association response data, &struct cfg80211_rx_assoc_resp_data
*
* After being asked to associate via cfg80211_ops::assoc() the driver must
* call either this function or cfg80211_auth_timeout().
@@ -7273,7 +7617,7 @@ struct cfg80211_rx_assoc_resp {
* This function may sleep. The caller must hold the corresponding wdev's mutex.
*/
void cfg80211_rx_assoc_resp(struct net_device *dev,
- struct cfg80211_rx_assoc_resp *data);
+ const struct cfg80211_rx_assoc_resp_data *data);
/**
* struct cfg80211_assoc_failure - association failure data
@@ -7392,7 +7736,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev,
* RFkill integration in cfg80211 is almost invisible to drivers,
* as cfg80211 automatically registers an rfkill instance for each
* wireless device it knows about. Soft kill is also translated
- * into disconnecting and turning all interfaces off, drivers are
+ * into disconnecting and turning all interfaces off. Drivers are
* expected to turn off the device when all interfaces are down.
*
* However, devices may have a hard RFkill line, in which case they
@@ -7440,7 +7784,7 @@ static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
* the configuration mechanism.
*
* A driver supporting vendor commands must register them as an array
- * in struct wiphy, with handlers for each one, each command has an
+ * in struct wiphy, with handlers for each one. Each command has an
* OUI and sub command ID to identify it.
*
* Note that this feature should not be (ab)used to implement protocol
@@ -7513,8 +7857,9 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb);
* cfg80211_vendor_cmd_get_sender - get the current sender netlink ID
* @wiphy: the wiphy
*
- * Return the current netlink port ID in a vendor command handler.
- * Valid to call only there.
+ * Return: the current netlink port ID in a vendor command handler.
+ *
+ * Context: May only be called from a vendor command handler
*/
unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy);
@@ -7604,7 +7949,7 @@ static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp)
* interact with driver-specific tools to aid, for instance,
* factory programming.
*
- * This chapter describes how drivers interact with it, for more
+ * This chapter describes how drivers interact with it. For more
* information see the nl80211 book's chapter on it.
*/
@@ -7992,7 +8337,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
* cfg80211_port_authorized - notify cfg80211 of successful security association
*
* @dev: network device
- * @bssid: the BSSID of the AP
+ * @peer_addr: BSSID of the AP/P2P GO in case of STA/GC or STA/GC MAC address
+ * in case of AP/P2P GO
* @td_bitmap: transition disable policy
* @td_bitmap_len: Length of transition disable policy
* @gfp: allocation flags
@@ -8003,8 +8349,11 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
* should be preceded with a call to cfg80211_connect_result(),
* cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to
* indicate the 802.11 association.
+ * This function can also be called by AP/P2P GO driver that supports
+ * authentication offload. In this case the peer_mac passed is that of
+ * associated STA/GC.
*/
-void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr,
const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp);
/**
@@ -8063,6 +8412,8 @@ void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie,
*
* @sinfo: the station information
* @gfp: allocation flags
+ *
+ * Return: 0 on success. Non-zero on error.
*/
int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp);
@@ -8437,6 +8788,7 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
* @chandef: chandef for the current channel
* @event: type of event
* @gfp: context flags
+ * @link_id: valid link_id for MLO operation or 0 otherwise.
*
* This function is called when a Channel availability check (CAC) is finished
* or aborted. This must be called to notify the completion of a CAC process,
@@ -8444,7 +8796,8 @@ void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
*/
void cfg80211_cac_event(struct net_device *netdev,
const struct cfg80211_chan_def *chandef,
- enum nl80211_radar_event event, gfp_t gfp);
+ enum nl80211_radar_event event, gfp_t gfp,
+ unsigned int link_id);
/**
* cfg80211_background_cac_abort - Channel Availability Check offchan abort event
@@ -8557,6 +8910,34 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
}
/**
+ * struct cfg80211_beaconing_check_config - beacon check configuration
+ * @iftype: the interface type to check for
+ * @relax: allow IR-relaxation conditions to apply (e.g. another
+ * interface connected already on the same channel)
+ * NOTE: If this is set, wiphy mutex must be held.
+ * @reg_power: &enum ieee80211_ap_reg_power value indicating the
+ * advertised/used 6 GHz regulatory power setting
+ */
+struct cfg80211_beaconing_check_config {
+ enum nl80211_iftype iftype;
+ enum ieee80211_ap_reg_power reg_power;
+ bool relax;
+};
+
+/**
+ * cfg80211_reg_check_beaconing - check if beaconing is allowed
+ * @wiphy: the wiphy
+ * @chandef: the channel definition
+ * @cfg: additional parameters for the checking
+ *
+ * Return: %true if there is no secondary channel or the secondary channel(s)
+ * can be used for beaconing (i.e. is not a radar channel etc.)
+ */
+bool cfg80211_reg_check_beaconing(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ struct cfg80211_beaconing_check_config *cfg);
+
+/**
* cfg80211_reg_can_beacon - check if beaconing is allowed
* @wiphy: the wiphy
* @chandef: the channel definition
@@ -8565,9 +8946,17 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
* Return: %true if there is no secondary channel or the secondary channel(s)
* can be used for beaconing (i.e. is not a radar channel etc.)
*/
-bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
- struct cfg80211_chan_def *chandef,
- enum nl80211_iftype iftype);
+static inline bool
+cfg80211_reg_can_beacon(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype)
+{
+ struct cfg80211_beaconing_check_config config = {
+ .iftype = iftype,
+ };
+
+ return cfg80211_reg_check_beaconing(wiphy, chandef, &config);
+}
/**
* cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation
@@ -8580,34 +8969,41 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
* also checks if IR-relaxation conditions apply, to allow beaconing under
* more permissive conditions.
*
- * Requires the wiphy mutex to be held.
+ * Context: Requires the wiphy mutex to be held.
*/
-bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
- struct cfg80211_chan_def *chandef,
- enum nl80211_iftype iftype);
+static inline bool
+cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
+ struct cfg80211_chan_def *chandef,
+ enum nl80211_iftype iftype)
+{
+ struct cfg80211_beaconing_check_config config = {
+ .iftype = iftype,
+ .relax = true,
+ };
-/*
+ return cfg80211_reg_check_beaconing(wiphy, chandef, &config);
+}
+
+/**
* cfg80211_ch_switch_notify - update wdev channel and notify userspace
* @dev: the device which switched channels
* @chandef: the new channel definition
* @link_id: the link ID for MLO, must be 0 for non-MLO
- * @punct_bitmap: the new puncturing bitmap
*
- * Caller must acquire wdev_lock, therefore must only be called from sleepable
+ * Caller must hold wiphy mutex, therefore must only be called from sleepable
* driver context!
*/
void cfg80211_ch_switch_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id, u16 punct_bitmap);
+ unsigned int link_id);
-/*
+/**
* cfg80211_ch_switch_started_notify - notify channel switch start
* @dev: the device on which the channel switch started
* @chandef: the future channel definition
* @link_id: the link ID for MLO, must be 0 for non-MLO
* @count: the number of TBTTs until the channel switch happens
* @quiet: whether or not immediate quiet was requested by the AP
- * @punct_bitmap: the future puncturing bitmap
*
* Inform the userspace about the channel switch that has just
* started, so that it can take appropriate actions (eg. starting
@@ -8616,7 +9012,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
unsigned int link_id, u8 count,
- bool quiet, u16 punct_bitmap);
+ bool quiet);
/**
* ieee80211_operating_class_to_band - convert operating class to band
@@ -8624,18 +9020,31 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
* @operating_class: the operating class to convert
* @band: band pointer to fill
*
- * Returns %true if the conversion was successful, %false otherwise.
+ * Return: %true if the conversion was successful, %false otherwise.
*/
bool ieee80211_operating_class_to_band(u8 operating_class,
enum nl80211_band *band);
/**
+ * ieee80211_operating_class_to_chandef - convert operating class to chandef
+ *
+ * @operating_class: the operating class to convert
+ * @chan: the ieee80211_channel to convert
+ * @chandef: a pointer to the resulting chandef
+ *
+ * Return: %true if the conversion was successful, %false otherwise.
+ */
+bool ieee80211_operating_class_to_chandef(u8 operating_class,
+ struct ieee80211_channel *chan,
+ struct cfg80211_chan_def *chandef);
+
+/**
* ieee80211_chandef_to_operating_class - convert chandef to operation class
*
* @chandef: the chandef to convert
* @op_class: a pointer to the resulting operating class
*
- * Returns %true if the conversion was successful, %false otherwise.
+ * Return: %true if the conversion was successful, %false otherwise.
*/
bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
u8 *op_class);
@@ -8645,7 +9054,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
*
* @chandef: the chandef to convert
*
- * Returns the center frequency of chandef (1st segment) in KHz.
+ * Return: the center frequency of chandef (1st segment) in KHz.
*/
static inline u32
ieee80211_chandef_to_khz(const struct cfg80211_chan_def *chandef)
@@ -8653,7 +9062,7 @@ ieee80211_chandef_to_khz(const struct cfg80211_chan_def *chandef)
return MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset;
}
-/*
+/**
* cfg80211_tdls_oper_request - request userspace to perform TDLS operation
* @dev: the device on which the operation is requested
* @peer: the MAC address of the peer device
@@ -8672,11 +9081,11 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
enum nl80211_tdls_operation oper,
u16 reason_code, gfp_t gfp);
-/*
+/**
* cfg80211_calculate_bitrate - calculate actual bitrate (in 100Kbps units)
* @rate: given rate_info to calculate bitrate from
*
- * return 0 if MCS index >= 32
+ * Return: calculated bitrate
*/
u32 cfg80211_calculate_bitrate(struct rate_info *rate);
@@ -8690,7 +9099,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate);
* when the driver wishes to unregister the wdev, e.g. when the hardware device
* is unbound from the driver.
*
- * Requires the RTNL and wiphy mutex to be held.
+ * Context: Requires the RTNL and wiphy mutex to be held.
*/
void cfg80211_unregister_wdev(struct wireless_dev *wdev);
@@ -8703,7 +9112,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev);
* held. Otherwise, both register_netdevice() and register_netdev() are usable
* instead as well.
*
- * Requires the RTNL and wiphy mutex to be held.
+ * Context: Requires the RTNL and wiphy mutex to be held.
+ *
+ * Return: 0 on success. Non-zero on error.
*/
int cfg80211_register_netdevice(struct net_device *dev);
@@ -8716,7 +9127,7 @@ int cfg80211_register_netdevice(struct net_device *dev);
* is held. Otherwise, both unregister_netdevice() and unregister_netdev() are
* usable instead as well.
*
- * Requires the RTNL and wiphy mutex to be held.
+ * Context: Requires the RTNL and wiphy mutex to be held.
*/
static inline void cfg80211_unregister_netdevice(struct net_device *dev)
{
@@ -8792,9 +9203,9 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
* correctly, if not the result of using this function will not
* be ordered correctly either, i.e. it does no reordering.
*
- * The function returns the offset where the next part of the
- * buffer starts, which may be @ielen if the entire (remainder)
- * of the buffer should be used.
+ * Return: The offset where the next part of the buffer starts, which
+ * may be @ielen if the entire (remainder) of the buffer should be
+ * used.
*/
size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
const u8 *ids, int n_ids,
@@ -8822,9 +9233,9 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
* correctly, if not the result of using this function will not
* be ordered correctly either, i.e. it does no reordering.
*
- * The function returns the offset where the next part of the
- * buffer starts, which may be @ielen if the entire (remainder)
- * of the buffer should be used.
+ * Return: The offset where the next part of the buffer starts, which
+ * may be @ielen if the entire (remainder) of the buffer should be
+ * used.
*/
static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
const u8 *ids, int n_ids, size_t offset)
@@ -8833,6 +9244,18 @@ static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
}
/**
+ * ieee80211_fragment_element - fragment the last element in skb
+ * @skb: The skbuf that the element was added to
+ * @len_pos: Pointer to length of the element to fragment
+ * @frag_id: The element ID to use for fragments
+ *
+ * This function fragments all data after @len_pos, adding fragmentation
+ * elements with the given ID as appropriate. The SKB will grow in size
+ * accordingly.
+ */
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id);
+
+/**
* cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN
* @wdev: the wireless device reporting the wakeup
* @wakeup: the wakeup report
@@ -8876,6 +9299,8 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy);
* This function can be called by the driver to check whether a
* combination of interfaces and their types are allowed according to
* the interface combinations.
+ *
+ * Return: 0 if combinations are allowed. Non-zero on error.
*/
int cfg80211_check_combinations(struct wiphy *wiphy,
struct iface_combination_params *params);
@@ -8891,6 +9316,8 @@ int cfg80211_check_combinations(struct wiphy *wiphy,
* This function can be called by the driver to check what possible
* combinations it fits in at a given moment, e.g. for channel switching
* purposes.
+ *
+ * Return: 0 on success. Non-zero on error.
*/
int cfg80211_iter_combinations(struct wiphy *wiphy,
struct iface_combination_params *params,
@@ -8898,7 +9325,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
void *data),
void *data);
-/*
+/**
* cfg80211_stop_iface - trigger interface disconnection
*
* @wiphy: the wiphy
@@ -8953,6 +9380,8 @@ static inline void wiphy_ext_feature_set(struct wiphy *wiphy,
*
* The extended features are flagged in multiple bytes (see
* &struct wiphy.@ext_features)
+ *
+ * Return: %true if extended feature flag is set, %false otherwise
*/
static inline bool
wiphy_ext_feature_isset(struct wiphy *wiphy,
@@ -9074,6 +9503,8 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
* Check whether the interface is allowed to operate; additionally, this API
* can be used to check iftype against the software interfaces when
* check_swif is '1'.
+ *
+ * Return: %true if allowed, %false otherwise
*/
bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
bool is_4addr, u8 check_swif);
@@ -9081,9 +9512,9 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
/**
* cfg80211_assoc_comeback - notification of association that was
- * temporarly rejected with a comeback
+ * temporarily rejected with a comeback
* @netdev: network device
- * @ap_addr: AP (MLD) address that rejected the assocation
+ * @ap_addr: AP (MLD) address that rejected the association
* @timeout: timeout interval value TUs.
*
* this function may sleep. the caller must hold the corresponding wdev's mutex.
@@ -9166,75 +9597,81 @@ void cfg80211_bss_flush(struct wiphy *wiphy);
* @cmd: the actual event we want to notify
* @count: the number of TBTTs until the color change happens
* @color_bitmap: representations of the colors that the local BSS is aware of
+ * @link_id: valid link_id in case of MLO or 0 for non-MLO.
+ *
+ * Return: 0 on success. Non-zero on error.
*/
int cfg80211_bss_color_notify(struct net_device *dev,
enum nl80211_commands cmd, u8 count,
- u64 color_bitmap);
+ u64 color_bitmap, u8 link_id);
/**
* cfg80211_obss_color_collision_notify - notify about bss color collision
* @dev: network device
* @color_bitmap: representations of the colors that the local BSS is aware of
+ * @link_id: valid link_id in case of MLO or 0 for non-MLO.
+ *
+ * Return: 0 on success. Non-zero on error.
*/
static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
- u64 color_bitmap)
+ u64 color_bitmap,
+ u8 link_id)
{
return cfg80211_bss_color_notify(dev, NL80211_CMD_OBSS_COLOR_COLLISION,
- 0, color_bitmap);
+ 0, color_bitmap, link_id);
}
/**
* cfg80211_color_change_started_notify - notify color change start
* @dev: the device on which the color is switched
* @count: the number of TBTTs until the color change happens
+ * @link_id: valid link_id in case of MLO or 0 for non-MLO.
*
* Inform the userspace about the color change that has started.
+ *
+ * Return: 0 on success. Non-zero on error.
*/
static inline int cfg80211_color_change_started_notify(struct net_device *dev,
- u8 count)
+ u8 count, u8 link_id)
{
return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_STARTED,
- count, 0);
+ count, 0, link_id);
}
/**
* cfg80211_color_change_aborted_notify - notify color change abort
* @dev: the device on which the color is switched
+ * @link_id: valid link_id in case of MLO or 0 for non-MLO.
*
* Inform the userspace about the color change that has aborted.
+ *
+ * Return: 0 on success. Non-zero on error.
*/
-static inline int cfg80211_color_change_aborted_notify(struct net_device *dev)
+static inline int cfg80211_color_change_aborted_notify(struct net_device *dev,
+ u8 link_id)
{
return cfg80211_bss_color_notify(dev, NL80211_CMD_COLOR_CHANGE_ABORTED,
- 0, 0);
+ 0, 0, link_id);
}
/**
* cfg80211_color_change_notify - notify color change completion
* @dev: the device on which the color was switched
+ * @link_id: valid link_id in case of MLO or 0 for non-MLO.
*
* Inform the userspace about the color change that has completed.
+ *
+ * Return: 0 on success. Non-zero on error.
*/
-static inline int cfg80211_color_change_notify(struct net_device *dev)
+static inline int cfg80211_color_change_notify(struct net_device *dev,
+ u8 link_id)
{
return cfg80211_bss_color_notify(dev,
NL80211_CMD_COLOR_CHANGE_COMPLETED,
- 0, 0);
+ 0, 0, link_id);
}
/**
- * cfg80211_valid_disable_subchannel_bitmap - validate puncturing bitmap
- * @bitmap: bitmap to be validated
- * @chandef: channel definition
- *
- * Validate the puncturing bitmap.
- *
- * Return: %true if the bitmap is valid. %false otherwise.
- */
-bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
- const struct cfg80211_chan_def *chandef);
-
-/**
* cfg80211_links_removed - Notify about removed STA MLD setup links.
* @dev: network device.
* @link_mask: BIT mask of removed STA MLD setup link IDs.
@@ -9247,4 +9684,64 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
*/
void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
+/**
+ * cfg80211_schedule_channels_check - schedule regulatory check if needed
+ * @wdev: the wireless device to check
+ *
+ * In case the device supports NO_IR or DFS relaxations, schedule regulatory
+ * channels check, as previous concurrent operation conditions may not
+ * hold anymore.
+ */
+void cfg80211_schedule_channels_check(struct wireless_dev *wdev);
+
+#ifdef CONFIG_CFG80211_DEBUGFS
+/**
+ * wiphy_locked_debugfs_read - do a locked read in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being read
+ * @buf: the buffer to fill and then read from
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy to
+ * @count: read count
+ * @ppos: read position
+ * @handler: the read handler to call (under wiphy lock)
+ * @data: additional data to pass to the read handler
+ *
+ * Return: the number of characters read, or a negative errno
+ */
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsize,
+ char __user *userbuf, size_t count,
+ loff_t *ppos,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t bufsize,
+ void *data),
+ void *data);
+
+/**
+ * wiphy_locked_debugfs_write - do a locked write in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being written to
+ * @buf: the buffer to copy the user data to
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy from
+ * @count: read count
+ * @handler: the write handler to call (under wiphy lock)
+ * @data: additional data to pass to the write handler
+ *
+ * Return: the number of characters written, or a negative errno
+ */
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsize,
+ const char __user *userbuf, size_t count,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data),
+ void *data);
+#endif
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 519d23941b54..76d2cd2e2b30 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -20,6 +20,7 @@ struct wpan_phy;
struct wpan_phy_cca;
struct cfg802154_scan_request;
struct cfg802154_beacon_request;
+struct ieee802154_addr;
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
struct ieee802154_llsec_device_key;
@@ -77,6 +78,12 @@ struct cfg802154_ops {
struct cfg802154_beacon_request *request);
int (*stop_beacons)(struct wpan_phy *wpan_phy,
struct wpan_dev *wpan_dev);
+ int (*associate)(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *coord);
+ int (*disassociate)(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target);
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
void (*get_llsec_table)(struct wpan_phy *wpan_phy,
struct wpan_dev *wpan_dev,
@@ -304,6 +311,22 @@ struct ieee802154_coord_desc {
};
/**
+ * struct ieee802154_pan_device - PAN device information
+ * @pan_id: the PAN ID of this device
+ * @mode: the preferred mode to reach the device
+ * @short_addr: the short address of this device
+ * @extended_addr: the extended address of this device
+ * @node: the list node
+ */
+struct ieee802154_pan_device {
+ __le16 pan_id;
+ u8 mode;
+ __le16 short_addr;
+ __le64 extended_addr;
+ struct list_head node;
+};
+
+/**
* struct cfg802154_scan_request - Scan request
*
* @type: type of scan to be performed
@@ -479,6 +502,13 @@ struct wpan_dev {
/* fallback for acknowledgment bit setting */
bool ackreq;
+
+ /* Associations */
+ struct mutex association_lock;
+ struct ieee802154_pan_device *parent;
+ struct list_head children;
+ unsigned int max_associations;
+ unsigned int nchildren;
};
#define to_phy(_dev) container_of(_dev, struct wpan_phy, dev)
@@ -530,4 +560,46 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy)
void ieee802154_configure_durations(struct wpan_phy *phy,
unsigned int page, unsigned int channel);
+/**
+ * cfg802154_device_is_associated - Checks whether we are associated to any device
+ * @wpan_dev: the wpan device
+ * @return: true if we are associated
+ */
+bool cfg802154_device_is_associated(struct wpan_dev *wpan_dev);
+
+/**
+ * cfg802154_device_is_parent - Checks if a device is our coordinator
+ * @wpan_dev: the wpan device
+ * @target: the expected parent
+ * @return: true if @target is our coordinator
+ */
+bool cfg802154_device_is_parent(struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target);
+
+/**
+ * cfg802154_device_is_child - Checks whether a device is associated to us
+ * @wpan_dev: the wpan device
+ * @target: the expected child
+ * @return: the PAN device
+ */
+struct ieee802154_pan_device *
+cfg802154_device_is_child(struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target);
+
+/**
+ * cfg802154_set_max_associations - Limit the number of future associations
+ * @wpan_dev: the wpan device
+ * @max: the maximum number of devices we accept to associate
+ * @return: the old maximum value
+ */
+unsigned int cfg802154_set_max_associations(struct wpan_dev *wpan_dev,
+ unsigned int max);
+
+/**
+ * cfg802154_get_free_short_addr - Get a free address among the known devices
+ * @wpan_dev: the wpan device
+ * @return: a random short address expectedly unused on our PAN
+ */
+__le16 cfg802154_get_free_short_addr(struct wpan_dev *wpan_dev);
+
#endif /* __NET_CFG802154_H */
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 1338cb92c8e7..28b101f26636 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -158,7 +158,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr);
+ __wsum diff, bool pseudohdr, bool ipv6);
static __always_inline
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 53dd7d988a2d..d6780d7903f4 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -28,7 +28,7 @@
#include <net/request_sock.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* known doi values */
#define CIPSO_V4_DOI_UNKNOWN 0x00000000
@@ -183,7 +183,8 @@ int cipso_v4_getattr(const unsigned char *cipso,
struct netlbl_lsm_secattr *secattr);
int cipso_v4_sock_setattr(struct sock *sk,
const struct cipso_v4_doi *doi_def,
- const struct netlbl_lsm_secattr *secattr);
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked);
void cipso_v4_sock_delattr(struct sock *sk);
int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
int cipso_v4_req_setattr(struct request_sock *req,
@@ -214,7 +215,8 @@ static inline int cipso_v4_getattr(const unsigned char *cipso,
static inline int cipso_v4_sock_setattr(struct sock *sk,
const struct cipso_v4_doi *doi_def,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
return -ENOSYS;
}
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 29fd1b4ee654..db5eff6cb60f 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -150,6 +150,7 @@ struct devlink_port {
struct devlink_rate *devlink_rate;
struct devlink_linecard *linecard;
+ u32 rel_index;
};
struct devlink_port_new_attrs {
@@ -351,7 +352,7 @@ struct devlink_dpipe_table {
bool resource_valid;
u64 resource_id;
u64 resource_units;
- struct devlink_dpipe_table_ops *table_ops;
+ const struct devlink_dpipe_table_ops *table_ops;
struct rcu_head rcu;
};
@@ -482,7 +483,8 @@ struct devlink_param {
int (*get)(struct devlink *devlink, u32 id,
struct devlink_param_gset_ctx *ctx);
int (*set)(struct devlink *devlink, u32 id,
- struct devlink_param_gset_ctx *ctx);
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack);
int (*validate)(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack);
@@ -598,12 +600,14 @@ enum devlink_param_generic_id {
.validate = _validate, \
}
-/* Part number, identifier of board design */
+/* Identifier of board design */
#define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID "board.id"
/* Revision of board design */
#define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV "board.rev"
/* Maker of the board */
#define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture"
+/* Part number of the board and its components */
+#define DEVLINK_INFO_VERSION_GENERIC_BOARD_PART_NUMBER "board.part_number"
/* Part number, identifier of asic design */
#define DEVLINK_INFO_VERSION_GENERIC_ASIC_ID "asic.id"
@@ -1601,6 +1605,14 @@ void devlink_free(struct devlink *devlink);
* capability. Should be used by device drivers to
* enable/disable ipsec_packet capability of a
* function managed by the devlink port.
+ * @port_fn_max_io_eqs_get: Callback used to get port function's maximum number
+ * of event queues. Should be used by device drivers to
+ * report the maximum event queues of a function
+ * managed by the devlink port.
+ * @port_fn_max_io_eqs_set: Callback used to set port function's maximum number
+ * of event queues. Should be used by device drivers to
+ * configure maximum number of event queues
+ * of a function managed by the devlink port.
*
* Note: Driver should return -EOPNOTSUPP if it doesn't support
* port function (@port_fn_*) handling for a particular port.
@@ -1650,6 +1662,12 @@ struct devlink_port_ops {
int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port,
bool enable,
struct netlink_ext_ack *extack);
+ int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port,
+ u32 *max_eqs,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port,
+ u32 max_eqs,
+ struct netlink_ext_ack *extack);
};
void devlink_port_init(struct devlink *devlink,
@@ -1697,6 +1715,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
+int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
+ struct devlink *fn_devlink);
struct devlink_rate *
devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
struct devlink_rate *parent);
@@ -1717,8 +1737,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard);
void devlink_linecard_provision_fail(struct devlink_linecard *linecard);
void devlink_linecard_activate(struct devlink_linecard *linecard);
void devlink_linecard_deactivate(struct devlink_linecard *linecard);
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
- struct devlink *nested_devlink);
+int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink);
int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
@@ -1731,7 +1751,7 @@ void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index);
void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
int devl_dpipe_table_register(struct devlink *devlink,
const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
+ const struct devlink_dpipe_table_ops *table_ops,
void *priv, bool counter_control_extern);
void devl_dpipe_table_unregister(struct devlink *devlink,
const char *table_name);
@@ -1851,36 +1871,36 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req,
const char *version_value,
enum devlink_info_version_type version_type);
-int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
-int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
-
-int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name);
-int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg);
-
-int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name);
-int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg);
-int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name);
-int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg);
-
-int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value);
-int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value);
-int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
- u16 value_len);
-
-int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
- bool value);
-int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u8 value);
-int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u32 value);
-int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u64 value);
-int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const char *value);
-int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const void *value, u32 value_len);
+void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
+void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
+
+void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name);
+void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg);
+
+void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name);
+void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg);
+void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name);
+void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg);
+
+void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value);
+void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value);
+void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len);
+
+void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value);
+void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value);
+void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value);
+void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value);
+void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value);
+void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u32 value_len);
struct devlink_health_reporter *
devl_port_health_reporter_create(struct devlink_port *port,
@@ -1918,6 +1938,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
void
devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
+int devl_nested_devlink_set(struct devlink *devlink,
+ struct devlink *nested_devlink);
bool devlink_is_reload_failed(const struct devlink *devlink);
void devlink_remote_reload_actions_performed(struct devlink *devlink,
enum devlink_reload_limit limit,
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a587e83fc169..4748680e8c88 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -20,11 +20,17 @@
FN(IP_NOPROTO) \
FN(SOCKET_RCVBUFF) \
FN(PROTO_MEM) \
+ FN(TCP_AUTH_HDR) \
FN(TCP_MD5NOTFOUND) \
FN(TCP_MD5UNEXPECTED) \
FN(TCP_MD5FAILURE) \
+ FN(TCP_AONOTFOUND) \
+ FN(TCP_AOUNEXPECTED) \
+ FN(TCP_AOKEYNOTFOUND) \
+ FN(TCP_AOFAILURE) \
FN(SOCKET_BACKLOG) \
FN(TCP_FLAGS) \
+ FN(TCP_ABORT_ON_DATA) \
FN(TCP_ZEROWINDOW) \
FN(TCP_OLD_DATA) \
FN(TCP_OVERWINDOW) \
@@ -32,6 +38,7 @@
FN(TCP_RFC7323_PAWS) \
FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
+ FN(TCP_INVALID_ACK_SEQUENCE) \
FN(TCP_RESET) \
FN(TCP_INVALID_SYN) \
FN(TCP_CLOSE) \
@@ -49,6 +56,7 @@
FN(NEIGH_QUEUEFULL) \
FN(NEIGH_DEAD) \
FN(TC_EGRESS) \
+ FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
FN(CPU_BACKLOG) \
FN(XDP) \
@@ -80,6 +88,10 @@
FN(IPV6_NDISC_BAD_OPTIONS) \
FN(IPV6_NDISC_NS_OTHERHOST) \
FN(QUEUE_PURGE) \
+ FN(TC_COOKIE_ERROR) \
+ FN(PACKET_SOCK_ERROR) \
+ FN(TC_CHAIN_NOTFOUND) \
+ FN(TC_RECLASSIFY_LOOP) \
FNe(MAX)
/**
@@ -96,7 +108,13 @@ enum skb_drop_reason {
SKB_CONSUMED,
/** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
SKB_DROP_REASON_NOT_SPECIFIED,
- /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
+ /**
+ * @SKB_DROP_REASON_NO_SOCKET: no valid socket that can be used.
+ * Reason could be one of three cases:
+ * 1) no established/listening socket found during lookup process
+ * 2) no valid request socket during 3WHS process
+ * 3) no valid child socket during 3WHS process
+ */
SKB_DROP_REASON_NO_SOCKET,
/** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
SKB_DROP_REASON_PKT_TOO_SMALL,
@@ -137,11 +155,16 @@ enum skb_drop_reason {
/** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
SKB_DROP_REASON_SOCKET_RCVBUFF,
/**
- * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet
- * drop out of udp_memory_allocated.
+ * @SKB_DROP_REASON_PROTO_MEM: proto memory limitation, such as
+ * udp packet drop out of udp_memory_allocated.
*/
SKB_DROP_REASON_PROTO_MEM,
/**
+ * @SKB_DROP_REASON_TCP_AUTH_HDR: TCP-MD5 or TCP-AO hashes are met
+ * twice or set incorrectly.
+ */
+ SKB_DROP_REASON_TCP_AUTH_HDR,
+ /**
* @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected,
* corresponding to LINUX_MIB_TCPMD5NOTFOUND
*/
@@ -157,6 +180,26 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_TCP_MD5FAILURE,
/**
+ * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected,
+ * corresponding to LINUX_MIB_TCPAOREQUIRED
+ */
+ SKB_DROP_REASON_TCP_AONOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TCP_AOUNEXPECTED: TCP-AO hash is present and it
+ * was not expected, corresponding to LINUX_MIB_TCPAOKEYNOTFOUND
+ */
+ SKB_DROP_REASON_TCP_AOUNEXPECTED,
+ /**
+ * @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown,
+ * corresponding to LINUX_MIB_TCPAOKEYNOTFOUND
+ */
+ SKB_DROP_REASON_TCP_AOKEYNOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong,
+ * corresponding to LINUX_MIB_TCPAOBAD
+ */
+ SKB_DROP_REASON_TCP_AOFAILURE,
+ /**
* @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog (
* see LINUX_MIB_TCPBACKLOGDROP)
*/
@@ -164,12 +207,17 @@ enum skb_drop_reason {
/** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
SKB_DROP_REASON_TCP_FLAGS,
/**
+ * @SKB_DROP_REASON_TCP_ABORT_ON_DATA: abort on data, corresponding to
+ * LINUX_MIB_TCPABORTONDATA
+ */
+ SKB_DROP_REASON_TCP_ABORT_ON_DATA,
+ /**
* @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
* see LINUX_MIB_TCPZEROWINDOWDROP
*/
SKB_DROP_REASON_TCP_ZEROWINDOW,
/**
- * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already
+ * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data received is already
* received before (spurious retrans may happened), see
* LINUX_MIB_DELAYEDACKLOST
*/
@@ -187,13 +235,19 @@ enum skb_drop_reason {
SKB_DROP_REASON_TCP_OFOMERGE,
/**
* @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
- * LINUX_MIB_PAWSESTABREJECTED
+ * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
+ /**
+ * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
+ * field because ack sequence is not in the window between snd_una
+ * and snd_nxt
+ */
+ SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE,
/** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
SKB_DROP_REASON_TCP_RESET,
/**
@@ -237,6 +291,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_NEIGH_DEAD,
/** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
SKB_DROP_REASON_TC_EGRESS,
+ /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
+ SKB_DROP_REASON_SECURITY_HOOK,
/**
* @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
* failed to enqueue to current qdisc)
@@ -346,6 +402,23 @@ enum skb_drop_reason {
/** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
SKB_DROP_REASON_QUEUE_PURGE,
/**
+ * @SKB_DROP_REASON_TC_COOKIE_ERROR: An error occurred whilst
+ * processing a tc ext cookie.
+ */
+ SKB_DROP_REASON_TC_COOKIE_ERROR,
+ /**
+ * @SKB_DROP_REASON_PACKET_SOCK_ERROR: generic packet socket errors
+ * after its filter matches an incoming packet.
+ */
+ SKB_DROP_REASON_PACKET_SOCK_ERROR,
+ /** @SKB_DROP_REASON_TC_CHAIN_NOTFOUND: tc chain lookup failed. */
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TC_RECLASSIFY_LOOP: tc exceeded max reclassify loop
+ * iterations.
+ */
+ SKB_DROP_REASON_TC_RECLASSIFY_LOOP,
+ /**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
*/
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 0b9c6aa27047..d7a6c2930277 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -24,9 +24,6 @@
struct dsa_8021q_context;
struct tc_action;
-struct phy_device;
-struct fixed_phy_status;
-struct phylink_link_state;
#define DSA_TAG_PROTO_NONE_VALUE 0
#define DSA_TAG_PROTO_BRCM_VALUE 1
@@ -56,6 +53,7 @@ struct phylink_link_state;
#define DSA_TAG_PROTO_RTL8_4T_VALUE 25
#define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26
#define DSA_TAG_PROTO_LAN937X_VALUE 27
+#define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
@@ -86,6 +84,7 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE,
DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE,
DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE,
+ DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE,
};
struct dsa_switch;
@@ -102,11 +101,11 @@ struct dsa_device_ops {
const char *name;
enum dsa_tag_protocol proto;
/* Some tagging protocols either mangle or shift the destination MAC
- * address, in which case the DSA master would drop packets on ingress
+ * address, in which case the DSA conduit would drop packets on ingress
* if what it understands out of the destination MAC address is not in
* its RX filter.
*/
- bool promisc_on_master;
+ bool promisc_on_conduit;
};
struct dsa_lag {
@@ -236,12 +235,12 @@ struct dsa_bridge {
};
struct dsa_port {
- /* A CPU port is physically connected to a master device.
- * A user port exposed to userspace has a slave device.
+ /* A CPU port is physically connected to a conduit device. A user port
+ * exposes a network device to user-space, called 'user' here.
*/
union {
- struct net_device *master;
- struct net_device *slave;
+ struct net_device *conduit;
+ struct net_device *user;
};
/* Copy of the tagging protocol operations, for quicker access
@@ -249,7 +248,7 @@ struct dsa_port {
*/
const struct dsa_device_ops *tag_ops;
- /* Copies for faster access in master receive hot path */
+ /* Copies for faster access in conduit receive hot path */
struct dsa_switch_tree *dst;
struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
@@ -281,9 +280,9 @@ struct dsa_port {
u8 lag_tx_enabled:1;
- /* Master state bits, valid only on CPU ports */
- u8 master_admin_up:1;
- u8 master_oper_up:1;
+ /* conduit state bits, valid only on CPU ports */
+ u8 conduit_admin_up:1;
+ u8 conduit_oper_up:1;
/* Valid only on user ports */
u8 cpu_port_in_lag:1;
@@ -303,7 +302,7 @@ struct dsa_port {
struct list_head list;
/*
- * Original copy of the master netdev ethtool_ops
+ * Original copy of the conduit netdev ethtool_ops
*/
const struct ethtool_ops *orig_ethtool_ops;
@@ -327,6 +326,12 @@ struct dsa_port {
};
};
+static inline struct dsa_port *
+dsa_phylink_to_port(struct phylink_config *config)
+{
+ return container_of(config, struct dsa_port, pl_config);
+}
+
/* TODO: ideally DSA ports would have a single dp->link_dp member,
* and no dst->rtable nor this struct dsa_link would be needed,
* but this would require some more complex tree walking,
@@ -398,14 +403,18 @@ struct dsa_switch {
*/
u32 configure_vlan_while_not_filtering:1;
- /* If the switch driver always programs the CPU port as egress tagged
- * despite the VLAN configuration indicating otherwise, then setting
- * @untag_bridge_pvid will force the DSA receive path to pop the
- * bridge's default_pvid VLAN tagged frames to offer a consistent
- * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge
- * device.
+ /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames.
+ * DEPRECATED: Do NOT set this field in new drivers. Instead look at
+ * the dsa_software_vlan_untag() comments.
*/
u32 untag_bridge_pvid:1;
+ /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames.
+ * Useful if the switch cannot preserve the VLAN tag as seen on the
+ * wire for user port ingress, and chooses to send all frames as
+ * VLAN-tagged to the CPU, including those which were originally
+ * untagged.
+ */
+ u32 untag_vlan_aware_bridge_pvid:1;
/* Let DSA manage the FDB entries towards the
* CPU, based on the software bridge database.
@@ -430,6 +439,11 @@ struct dsa_switch {
*/
u32 fdb_isolation:1;
+ /* Drivers that have global DSCP mapping settings must set this to
+ * true to automatically apply the settings to all ports.
+ */
+ u32 dscp_prio_mapping_is_global:1;
+
/* Listener for switch fabric events */
struct notifier_block nb;
@@ -452,10 +466,15 @@ struct dsa_switch {
const struct dsa_switch_ops *ops;
/*
- * Slave mii_bus and devices for the individual ports.
+ * Allow a DSA switch driver to override the phylink MAC ops
+ */
+ const struct phylink_mac_ops *phylink_mac_ops;
+
+ /*
+ * User mii_bus and devices for the individual ports.
*/
u32 phys_mii_mask;
- struct mii_bus *slave_mii_bus;
+ struct mii_bus *user_mii_bus;
/* Ageing Time limits in msecs */
unsigned int ageing_time_min;
@@ -520,10 +539,10 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp)
return dp->type == DSA_PORT_TYPE_UNUSED;
}
-static inline bool dsa_port_master_is_operational(struct dsa_port *dp)
+static inline bool dsa_port_conduit_is_operational(struct dsa_port *dp)
{
- return dsa_port_is_cpu(dp) && dp->master_admin_up &&
- dp->master_oper_up;
+ return dsa_port_is_cpu(dp) && dp->conduit_admin_up &&
+ dp->conduit_oper_up;
}
static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
@@ -578,6 +597,10 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p)
dsa_switch_for_each_port((_dp), (_ds)) \
if (dsa_port_is_user((_dp)))
+#define dsa_switch_for_each_user_port_continue_reverse(_dp, _ds) \
+ dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \
+ if (dsa_port_is_user((_dp)))
+
#define dsa_switch_for_each_cpu_port(_dp, _ds) \
dsa_switch_for_each_port((_dp), (_ds)) \
if (dsa_port_is_cpu((_dp)))
@@ -713,12 +736,12 @@ static inline bool dsa_port_offloads_lag(struct dsa_port *dp,
return dsa_port_lag_dev_get(dp) == lag->dev;
}
-static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp)
+static inline struct net_device *dsa_port_to_conduit(const struct dsa_port *dp)
{
if (dp->cpu_port_in_lag)
return dsa_port_lag_dev_get(dp->cpu_dp);
- return dp->cpu_dp->master;
+ return dp->cpu_dp->conduit;
}
static inline
@@ -732,7 +755,7 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
else if (dp->hsr_dev)
return dp->hsr_dev;
- return dp->slave;
+ return dp->user;
}
static inline struct net_device *
@@ -834,9 +857,9 @@ struct dsa_switch_ops {
int (*connect_tag_protocol)(struct dsa_switch *ds,
enum dsa_tag_protocol proto);
- int (*port_change_master)(struct dsa_switch *ds, int port,
- struct net_device *master,
- struct netlink_ext_ack *extack);
+ int (*port_change_conduit)(struct dsa_switch *ds, int port,
+ struct net_device *conduit,
+ struct netlink_ext_ack *extack);
/* Optional switch-wide initialization and destruction methods */
int (*setup)(struct dsa_switch *ds);
@@ -858,14 +881,6 @@ struct dsa_switch_ops {
int regnum, u16 val);
/*
- * Link state adjustment (called from libphy)
- */
- void (*adjust_link)(struct dsa_switch *ds, int port,
- struct phy_device *phydev);
- void (*fixed_link_update)(struct dsa_switch *ds, int port,
- struct fixed_phy_status *st);
-
- /*
* PHYLINK integration
*/
void (*phylink_get_caps)(struct dsa_switch *ds, int port,
@@ -873,15 +888,9 @@ struct dsa_switch_ops {
struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
int port,
phy_interface_t iface);
- int (*phylink_mac_prepare)(struct dsa_switch *ds, int port,
- unsigned int mode,
- phy_interface_t interface);
void (*phylink_mac_config)(struct dsa_switch *ds, int port,
unsigned int mode,
const struct phylink_link_state *state);
- int (*phylink_mac_finish)(struct dsa_switch *ds, int port,
- unsigned int mode,
- phy_interface_t interface);
void (*phylink_mac_link_down)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
@@ -931,7 +940,7 @@ struct dsa_switch_ops {
* ethtool timestamp info
*/
int (*get_ts_info)(struct dsa_switch *ds, int port,
- struct ethtool_ts_info *ts);
+ struct kernel_ethtool_ts_info *ts);
/*
* ethtool MAC merge layer
@@ -955,6 +964,10 @@ struct dsa_switch_ops {
u8 prio);
int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp,
u8 prio);
+ int (*port_set_apptrust)(struct dsa_switch *ds, int port,
+ const u8 *sel, int nsel);
+ int (*port_get_apptrust)(struct dsa_switch *ds, int port, u8 *sel,
+ int *nsel);
/*
* Suspend and resume
@@ -969,6 +982,16 @@ struct dsa_switch_ops {
struct phy_device *phy);
void (*port_disable)(struct dsa_switch *ds, int port);
+
+ /*
+ * Notification for MAC address changes on user ports. Drivers can
+ * currently only veto operations. They should not use the method to
+ * program the hardware, since the operation is not rolled back in case
+ * of other errors.
+ */
+ int (*port_set_mac_address)(struct dsa_switch *ds, int port,
+ const unsigned char *addr);
+
/*
* Compatibility between device trees defining multiple CPU ports and
* drivers which are not OK to use by default the numerically smallest
@@ -981,9 +1004,9 @@ struct dsa_switch_ops {
* Port's MAC EEE settings
*/
int (*set_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_eee *e);
+ struct ethtool_keee *e);
int (*get_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_eee *e);
+ struct ethtool_keee *e);
/* EEPROM access */
int (*get_eeprom_len)(struct dsa_switch *ds);
@@ -1198,7 +1221,8 @@ struct dsa_switch_ops {
* HSR integration
*/
int (*port_hsr_join)(struct dsa_switch *ds, int port,
- struct net_device *hsr);
+ struct net_device *hsr,
+ struct netlink_ext_ack *extack);
int (*port_hsr_leave)(struct dsa_switch *ds, int port,
struct net_device *hsr);
@@ -1222,11 +1246,11 @@ struct dsa_switch_ops {
int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
/*
- * DSA master tracking operations
+ * DSA conduit tracking operations
*/
- void (*master_state_change)(struct dsa_switch *ds,
- const struct net_device *master,
- bool operational);
+ void (*conduit_state_change)(struct dsa_switch *ds,
+ const struct net_device *conduit,
+ bool operational);
};
#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \
@@ -1236,7 +1260,8 @@ struct dsa_switch_ops {
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx);
int dsa_devlink_param_set(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx);
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack);
int dsa_devlink_params_register(struct dsa_switch *ds,
const struct devlink_param *params,
size_t params_count);
@@ -1363,9 +1388,9 @@ static inline int dsa_switch_resume(struct dsa_switch *ds)
#endif /* CONFIG_PM_SLEEP */
#if IS_ENABLED(CONFIG_NET_DSA)
-bool dsa_slave_dev_check(const struct net_device *dev);
+bool dsa_user_dev_check(const struct net_device *dev);
#else
-static inline bool dsa_slave_dev_check(const struct net_device *dev)
+static inline bool dsa_user_dev_check(const struct net_device *dev)
{
return false;
}
diff --git a/include/net/dsa_stubs.h b/include/net/dsa_stubs.h
index 361811750a54..6f384897f287 100644
--- a/include/net/dsa_stubs.h
+++ b/include/net/dsa_stubs.h
@@ -13,14 +13,14 @@
extern const struct dsa_stubs *dsa_stubs;
struct dsa_stubs {
- int (*master_hwtstamp_validate)(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack);
+ int (*conduit_hwtstamp_validate)(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
};
-static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack)
+static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
if (!netdev_uses_dsa(dev))
return 0;
@@ -29,18 +29,18 @@ static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
* netdev_uses_dsa() returns true, the dsa_core module is still
* registered, and so, dsa_unregister_stubs() couldn't have run.
* For netdev_uses_dsa() to start returning false, it would imply that
- * dsa_master_teardown() has executed, which requires rtnl_lock().
+ * dsa_conduit_teardown() has executed, which requires rtnl_lock().
*/
ASSERT_RTNL();
- return dsa_stubs->master_hwtstamp_validate(dev, config, extack);
+ return dsa_stubs->conduit_hwtstamp_validate(dev, config, extack);
}
#else
-static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack)
+static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
return 0;
}
diff --git a/include/net/dscp.h b/include/net/dscp.h
new file mode 100644
index 000000000000..ba40540868c9
--- /dev/null
+++ b/include/net/dscp.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> */
+
+#ifndef __DSCP_H__
+#define __DSCP_H__
+
+/*
+ * DSCP Pools and Codepoint Space Division:
+ *
+ * The Differentiated Services (Diffserv) architecture defines a method for
+ * classifying and managing network traffic using the DS field in IPv4 and IPv6
+ * packet headers. This field can carry one of 64 distinct DSCP (Differentiated
+ * Services Code Point) values, which are divided into three pools based on
+ * their Least Significant Bits (LSB) patterns and intended usage. Each pool has
+ * a specific registration procedure for assigning DSCP values:
+ *
+ * Pool 1 (Standards Action Pool):
+ * - Codepoint Space: xxxxx0
+ * This pool includes DSCP values ending in '0' (binary), allocated via
+ * Standards Action. It is intended for globally recognized traffic classes,
+ * ensuring interoperability across the internet. This pool encompasses
+ * well-known DSCP values such as CS0-CS7, AFxx, EF, and VOICE-ADMIT.
+ *
+ * Pool 2 (Experimental/Local Use Pool):
+ * - Codepoint Space: xxxx11
+ * Reserved for DSCP values ending in '11' (binary), this pool is designated
+ * for Experimental or Local Use. It allows for private or temporary traffic
+ * marking schemes not intended for standardized global use, facilitating
+ * testing and network-specific configurations without impacting
+ * interoperability.
+ *
+ * Pool 3 (Preferential Standardization Pool):
+ * - Codepoint Space: xxxx01
+ * Initially reserved for experimental or local use, this pool now serves as
+ * a secondary standardization resource should Pool 1 become exhausted. DSCP
+ * values ending in '01' (binary) are assigned via Standards Action, with a
+ * focus on adopting new, standardized traffic classes as the need arises.
+ *
+ * For pool updates see:
+ * https://www.iana.org/assignments/dscp-registry/dscp-registry.xhtml
+ */
+
+/* Pool 1: Standardized DSCP values as per [RFC8126] */
+#define DSCP_CS0 0 /* 000000, [RFC2474] */
+/* CS0 is some times called default (DF) */
+#define DSCP_DF 0 /* 000000, [RFC2474] */
+#define DSCP_CS1 8 /* 001000, [RFC2474] */
+#define DSCP_CS2 16 /* 010000, [RFC2474] */
+#define DSCP_CS3 24 /* 011000, [RFC2474] */
+#define DSCP_CS4 32 /* 100000, [RFC2474] */
+#define DSCP_CS5 40 /* 101000, [RFC2474] */
+#define DSCP_CS6 48 /* 110000, [RFC2474] */
+#define DSCP_CS7 56 /* 111000, [RFC2474] */
+#define DSCP_AF11 10 /* 001010, [RFC2597] */
+#define DSCP_AF12 12 /* 001100, [RFC2597] */
+#define DSCP_AF13 14 /* 001110, [RFC2597] */
+#define DSCP_AF21 18 /* 010010, [RFC2597] */
+#define DSCP_AF22 20 /* 010100, [RFC2597] */
+#define DSCP_AF23 22 /* 010110, [RFC2597] */
+#define DSCP_AF31 26 /* 011010, [RFC2597] */
+#define DSCP_AF32 28 /* 011100, [RFC2597] */
+#define DSCP_AF33 30 /* 011110, [RFC2597] */
+#define DSCP_AF41 34 /* 100010, [RFC2597] */
+#define DSCP_AF42 36 /* 100100, [RFC2597] */
+#define DSCP_AF43 38 /* 100110, [RFC2597] */
+#define DSCP_EF 46 /* 101110, [RFC3246] */
+#define DSCP_VOICE_ADMIT 44 /* 101100, [RFC5865] */
+
+/* Pool 3: Standardized assignments, previously available for experimental/local
+ * use
+ */
+#define DSCP_LE 1 /* 000001, [RFC8622] */
+
+#define DSCP_MAX 64
+
+#endif /* __DSCP_H__ */
diff --git a/include/net/dst.h b/include/net/dst.h
index 78884429deed..08647c99d79c 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -222,13 +222,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr
return msecs_to_jiffies(dst_metric(dst, metric));
}
-static inline u32
-dst_allfrag(const struct dst_entry *dst)
-{
- int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG);
- return ret;
-}
-
static inline int
dst_metric_locked(const struct dst_entry *dst, int metric)
{
@@ -348,7 +341,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
skb->dev = dev;
/*
- * Clear hash so that we can recalulate the hash for the
+ * Clear hash so that we can recalculate the hash for the
* encapsulated packet, unless we have already determine the hash
* over the L4 4-tuple.
*/
@@ -392,12 +385,11 @@ static inline int dst_discard(struct sk_buff *skb)
{
return dst_discard_out(&init_net, skb->sk, skb);
}
-void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_obsolete, unsigned short flags);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
- struct net_device *dev, int initial_ref, int initial_obsolete,
+ struct net_device *dev, int initial_obsolete,
unsigned short flags);
-struct dst_entry *dst_destroy(struct dst_entry *dst);
void dst_dev_put(struct dst_entry *dst);
static inline void dst_confirm(struct dst_entry *dst)
@@ -448,6 +440,15 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
dst->expires = expires;
}
+static inline unsigned int dst_dev_overhead(struct dst_entry *dst,
+ struct sk_buff *skb)
+{
+ if (likely(dst))
+ return LL_RESERVED_SPACE(dst->dev);
+
+ return skb->mac_len;
+}
+
INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index df6622a5fe98..1961699598e2 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -76,7 +76,7 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
*/
static inline void dst_cache_reset(struct dst_cache *dst_cache)
{
- dst_cache->reset_ts = jiffies;
+ WRITE_ONCE(dst_cache->reset_ts, jiffies);
}
/**
@@ -102,7 +102,7 @@ int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
* @dst_cache: the cache
*
* No synchronization is enforced: it must be called only when the cache
- * is unsed.
+ * is unused.
*/
void dst_cache_destroy(struct dst_cache *dst_cache);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 1b7fae4c6b24..84c15402931c 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -163,8 +163,11 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
if (!new_md)
return ERR_PTR(-ENOMEM);
- memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
- sizeof(struct ip_tunnel_info) + md_size);
+ unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+ sizeof(struct ip_tunnel_info) + md_size,
+ /* metadata_dst_alloc() reserves room (md_size bytes) for
+ * options right after the ip_tunnel_info struct.
+ */);
#ifdef CONFIG_DST_CACHE
/* Unclone the dst cache if there is one */
if (new_md->u.tun_info.dst_cache.cache) {
@@ -198,7 +201,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr,
__be32 daddr,
__u8 tos, __u8 ttl,
__be16 tp_dst,
- __be16 flags,
+ const unsigned long *flags,
__be64 tunnel_id,
int md_size)
{
@@ -215,7 +218,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr,
}
static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
- __be16 flags,
+ const unsigned long *flags,
__be64 tunnel_id,
int md_size)
{
@@ -230,7 +233,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad
__u8 tos, __u8 ttl,
__be16 tp_dst,
__be32 label,
- __be16 flags,
+ const unsigned long *flags,
__be64 tunnel_id,
int md_size)
{
@@ -243,7 +246,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad
info = &tun_dst->u.tun_info;
info->mode = IP_TUNNEL_INFO_IPV6;
- info->key.tun_flags = flags;
+ ip_tunnel_flags_copy(info->key.tun_flags, flags);
info->key.tun_id = tunnel_id;
info->key.tp_src = 0;
info->key.tp_dst = tp_dst;
@@ -259,7 +262,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad
}
static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
- __be16 flags,
+ const unsigned long *flags,
__be64 tunnel_id,
int md_size)
{
diff --git a/include/net/eee.h b/include/net/eee.h
new file mode 100644
index 000000000000..84837aba3cd9
--- /dev/null
+++ b/include/net/eee.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _EEE_H
+#define _EEE_H
+
+#include <linux/types.h>
+
+struct eee_config {
+ u32 tx_lpi_timer;
+ bool tx_lpi_enabled;
+ bool eee_enabled;
+};
+
+static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg)
+{
+ /* eee_enabled is the master on/off */
+ if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled)
+ return false;
+
+ return true;
+}
+
+static inline void eeecfg_to_eee(struct ethtool_keee *eee,
+ const struct eee_config *eeecfg)
+{
+ eee->tx_lpi_timer = eeecfg->tx_lpi_timer;
+ eee->tx_lpi_enabled = eeecfg->tx_lpi_enabled;
+ eee->eee_enabled = eeecfg->eee_enabled;
+}
+
+static inline void eee_to_eeecfg(struct eee_config *eeecfg,
+ const struct ethtool_keee *eee)
+{
+ eeecfg->tx_lpi_timer = eee->tx_lpi_timer;
+ eeecfg->tx_lpi_enabled = eee->tx_lpi_enabled;
+ eeecfg->eee_enabled = eee->eee_enabled;
+}
+
+#endif
diff --git a/include/net/erspan.h b/include/net/erspan.h
index 6cb4cbd6a48f..c6209e7b6c96 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -89,7 +89,7 @@ enum erspan_encap_type {
ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */
ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */
- ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */
+ ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag preserved in frame */
};
#define ERSPAN_V1_MDSIZE 4
@@ -192,7 +192,7 @@ static inline void erspan_build_header(struct sk_buff *skb,
enc_type = ERSPAN_ENCAP_NOVLAN;
/* If mirrored packet has vlan tag, extract tci and
- * perserve vlan header in the mirrored frame.
+ * preserve vlan header in the mirrored frame.
*/
if (eth->h_proto == htons(ETH_P_8021Q)) {
qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
diff --git a/include/net/espintcp.h b/include/net/espintcp.h
index 0335bbd76552..c70efd704b6d 100644
--- a/include/net/espintcp.h
+++ b/include/net/espintcp.h
@@ -32,7 +32,7 @@ struct espintcp_ctx {
static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
/* RCU is only needed for diag */
return (__force void *)icsk->icsk_ulp_data;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 82da359bca03..d17855c52ef9 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -172,8 +172,7 @@ void fib_rules_unregister(struct fib_rules_ops *);
int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
struct fib_lookup_arg *);
-int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
- u32 flags);
+int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table);
bool fib_rule_matchall(const struct fib_rule *rule);
int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
struct netlink_ext_ack *extack);
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 1a7131d6cb0e..ced79dc8e856 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -7,6 +7,7 @@
#include <linux/siphash.h>
#include <linux/string.h>
#include <uapi/linux/if_ether.h>
+#include <uapi/linux/pkt_cls.h>
struct bpf_prog;
struct net;
@@ -16,7 +17,8 @@ struct sk_buff;
* struct flow_dissector_key_control:
* @thoff: Transport header offset
* @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_*
- * @flags: Key flags. Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAGENCAPSULATION)
+ * @flags: Key flags.
+ * Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAG|ENCAPSULATION|F_*)
*/
struct flow_dissector_key_control {
u16 thoff;
@@ -24,9 +26,20 @@ struct flow_dissector_key_control {
u32 flags;
};
-#define FLOW_DIS_IS_FRAGMENT BIT(0)
-#define FLOW_DIS_FIRST_FRAG BIT(1)
-#define FLOW_DIS_ENCAPSULATION BIT(2)
+/* The control flags are kept in sync with TCA_FLOWER_KEY_FLAGS_*, as those
+ * flags are exposed to userspace in some error paths, ie. unsupported flags.
+ */
+enum flow_dissector_ctrl_flags {
+ FLOW_DIS_IS_FRAGMENT = TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT,
+ FLOW_DIS_FIRST_FRAG = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+ FLOW_DIS_F_TUNNEL_CSUM = TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM,
+ FLOW_DIS_F_TUNNEL_DONT_FRAGMENT = TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT,
+ FLOW_DIS_F_TUNNEL_OAM = TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM,
+ FLOW_DIS_F_TUNNEL_CRIT_OPT = TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT,
+
+ /* These flags are internal to the kernel */
+ FLOW_DIS_ENCAPSULATION = (TCA_FLOWER_KEY_FLAGS_MAX << 1),
+};
enum flow_dissect_ret {
FLOW_DISSECT_RET_OUT_GOOD,
@@ -97,7 +110,7 @@ struct flow_dissector_key_enc_opts {
* here but seems difficult to #include
*/
u8 len;
- __be16 dst_opt_type;
+ u32 dst_opt_type;
};
struct flow_dissector_key_keyid {
@@ -433,6 +446,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
}
u32 flow_hash_from_keys(struct flow_keys *keys);
+u32 flow_hash_from_keys_seed(struct flow_keys *keys,
+ const siphash_key_t *keyval);
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp,
const void *data, int thoff, int hlen);
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 9efa9a59e81f..292cd8f4b762 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -333,7 +333,7 @@ struct flow_action_entry {
struct flow_action {
unsigned int num_entries;
- struct flow_action_entry entries[];
+ struct flow_action_entry entries[] __counted_by(num_entries);
};
static inline bool flow_action_has_entries(const struct flow_action *action)
@@ -345,7 +345,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
* flow_offload_has_one_action() - check if exactly one action is present
* @action: tc filter flow offload action
*
- * Returns true if exactly one action is present.
+ * Return: true if exactly one action is present.
*/
static inline bool flow_offload_has_one_action(const struct flow_action *action)
{
@@ -449,6 +449,96 @@ static inline bool flow_rule_match_key(const struct flow_rule *rule,
return dissector_uses_key(rule->match.dissector, key);
}
+/**
+ * flow_rule_is_supp_control_flags() - check for supported control flags
+ * @supp_flags: control flags supported by driver
+ * @ctrl_flags: control flags present in rule
+ * @extack: The netlink extended ACK for reporting errors.
+ *
+ * Return: true if only supported control flags are set, false otherwise.
+ */
+static inline bool flow_rule_is_supp_control_flags(const u32 supp_flags,
+ const u32 ctrl_flags,
+ struct netlink_ext_ack *extack)
+{
+ if (likely((ctrl_flags & ~supp_flags) == 0))
+ return true;
+
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported match on control.flags %#x",
+ ctrl_flags);
+
+ return false;
+}
+
+/**
+ * flow_rule_is_supp_enc_control_flags() - check for supported control flags
+ * @supp_enc_flags: encapsulation control flags supported by driver
+ * @enc_ctrl_flags: encapsulation control flags present in rule
+ * @extack: The netlink extended ACK for reporting errors.
+ *
+ * Return: true if only supported control flags are set, false otherwise.
+ */
+static inline bool flow_rule_is_supp_enc_control_flags(const u32 supp_enc_flags,
+ const u32 enc_ctrl_flags,
+ struct netlink_ext_ack *extack)
+{
+ if (likely((enc_ctrl_flags & ~supp_enc_flags) == 0))
+ return true;
+
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Unsupported match on enc_control.flags %#x",
+ enc_ctrl_flags);
+
+ return false;
+}
+
+/**
+ * flow_rule_has_control_flags() - check for presence of any control flags
+ * @ctrl_flags: control flags present in rule
+ * @extack: The netlink extended ACK for reporting errors.
+ *
+ * Return: true if control flags are set, false otherwise.
+ */
+static inline bool flow_rule_has_control_flags(const u32 ctrl_flags,
+ struct netlink_ext_ack *extack)
+{
+ return !flow_rule_is_supp_control_flags(0, ctrl_flags, extack);
+}
+
+/**
+ * flow_rule_has_enc_control_flags() - check for presence of any control flags
+ * @enc_ctrl_flags: encapsulation control flags present in rule
+ * @extack: The netlink extended ACK for reporting errors.
+ *
+ * Return: true if control flags are set, false otherwise.
+ */
+static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags,
+ struct netlink_ext_ack *extack)
+{
+ return !flow_rule_is_supp_enc_control_flags(0, enc_ctrl_flags, extack);
+}
+
+/**
+ * flow_rule_match_has_control_flags() - match and check for any control flags
+ * @rule: The flow_rule under evaluation.
+ * @extack: The netlink extended ACK for reporting errors.
+ *
+ * Return: true if control flags are set, false otherwise.
+ */
+static inline bool flow_rule_match_has_control_flags(struct flow_rule *rule,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_match_control match;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL))
+ return false;
+
+ flow_rule_match_control(rule, &match);
+
+ return flow_rule_has_control_flags(match.mask->flags, extack);
+}
+
struct flow_stats {
u64 pkts;
u64 bytes;
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index e8c34aa4a640..c1d91f1d20f6 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -2,22 +2,33 @@
#ifndef __NET_GENERIC_NETLINK_H
#define __NET_GENERIC_NETLINK_H
-#include <linux/genetlink.h>
+#include <linux/net.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
+#include <uapi/linux/genetlink.h>
#define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN)
+/* Non-parallel generic netlink requests are serialized by a global lock. */
+void genl_lock(void);
+void genl_unlock(void);
+
+#define MODULE_ALIAS_GENL_FAMILY(family) \
+ MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family)
+
+/* Binding to multicast group requires %CAP_NET_ADMIN */
+#define GENL_MCAST_CAP_NET_ADMIN BIT(0)
+/* Binding to multicast group requires %CAP_SYS_ADMIN */
+#define GENL_MCAST_CAP_SYS_ADMIN BIT(1)
+
/**
* struct genl_multicast_group - generic netlink multicast group
* @name: name of the multicast group, names are per-family
- * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
- * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding
+ * @flags: GENL_MCAST_* flags
*/
struct genl_multicast_group {
char name[GENL_NAMSIZ];
u8 flags;
- u8 cap_sys_admin:1;
};
struct genl_split_ops;
@@ -38,6 +49,8 @@ struct genl_info;
* do additional, common, filtering and return an error
* @post_doit: called after an operation's doit callback, it may
* undo operations done by pre_doit, for example release locks
+ * @bind: called when family multicast group is added to a netlink socket
+ * @unbind: called when family multicast group is removed from a netlink socket
* @module: pointer to the owning module (set to THIS_MODULE)
* @mcgrps: multicast groups used by this family
* @n_mcgrps: number of multicast groups
@@ -51,6 +64,9 @@ struct genl_info;
* @split_ops: the split do/dump form of operation definition
* @n_split_ops: number of entries in @split_ops, not that with split do/dump
* ops the number of entries is not the same as number of commands
+ * @sock_priv_size: the size of per-socket private memory
+ * @sock_priv_init: the per-socket private memory initializer
+ * @sock_priv_destroy: the per-socket private memory destructor
*
* Attribute policies (the combination of @policy and @maxattr fields)
* can be attached at the family level or at the operation level.
@@ -78,17 +94,25 @@ struct genl_family {
void (*post_doit)(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
+ int (*bind)(int mcgrp);
+ void (*unbind)(int mcgrp);
const struct genl_ops * ops;
const struct genl_small_ops *small_ops;
const struct genl_split_ops *split_ops;
const struct genl_multicast_group *mcgrps;
struct module *module;
+ size_t sock_priv_size;
+ void (*sock_priv_init)(void *priv);
+ void (*sock_priv_destroy)(void *priv);
+
/* private: internal use only */
/* protocol family identifier */
int id;
/* starting number of multicast group IDs in this family */
unsigned int mcgrp_offset;
+ /* list of per-socket privs */
+ struct xarray *sock_privs;
};
/**
@@ -137,7 +161,7 @@ static inline void *genl_info_userhdr(const struct genl_info *info)
/* Report that a root attribute is missing */
#define GENL_REQ_ATTR_CHECK(info, attr) ({ \
- struct genl_info *__info = (info); \
+ const struct genl_info *__info = (info); \
\
NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \
})
@@ -298,6 +322,8 @@ static inline bool genl_info_is_ntf(const struct genl_info *info)
return !info->nlhdr;
}
+void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk);
+void *genl_sk_priv_get(struct genl_family *family, struct sock *sk);
int genl_register_family(struct genl_family *family);
int genl_unregister_family(const struct genl_family *family);
void genl_notify(const struct genl_family *family, struct sk_buff *skb,
@@ -438,6 +464,35 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
}
/**
+ * genlmsg_multicast_netns_filtered - multicast a netlink message
+ * to a specific netns with filter
+ * function
+ * @family: the generic netlink family
+ * @net: the net namespace
+ * @skb: netlink message as socket buffer
+ * @portid: own netlink portid to avoid sending to yourself
+ * @group: offset of multicast group in groups array
+ * @flags: allocation flags
+ * @filter: filter function
+ * @filter_data: filter function private data
+ *
+ * Return: 0 on success, negative error code for failure.
+ */
+static inline int
+genlmsg_multicast_netns_filtered(const struct genl_family *family,
+ struct net *net, struct sk_buff *skb,
+ u32 portid, unsigned int group, gfp_t flags,
+ netlink_filter_fn filter,
+ void *filter_data)
+{
+ if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ return -EINVAL;
+ group = family->mcgrp_offset + group;
+ return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group,
+ flags, filter, filter_data);
+}
+
+/**
* genlmsg_multicast_netns - multicast a netlink message to a specific netns
* @family: the generic netlink family
* @net: the net namespace
@@ -450,10 +505,8 @@ static inline int genlmsg_multicast_netns(const struct genl_family *family,
struct net *net, struct sk_buff *skb,
u32 portid, unsigned int group, gfp_t flags)
{
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
- return -EINVAL;
- group = family->mcgrp_offset + group;
- return nlmsg_multicast(net->genl_sock, skb, portid, group, flags);
+ return genlmsg_multicast_netns_filtered(family, net, skb, portid,
+ group, flags, NULL, NULL);
}
/**
diff --git a/include/net/gre.h b/include/net/gre.h
index 4e209708b754..ccd293203284 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -49,67 +49,61 @@ static inline bool netif_is_ip6gretap(const struct net_device *dev)
!strcmp(dev->rtnl_link_ops->kind, "ip6gretap");
}
-static inline int gre_calc_hlen(__be16 o_flags)
+static inline int gre_calc_hlen(const unsigned long *o_flags)
{
int addend = 4;
- if (o_flags & TUNNEL_CSUM)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, o_flags))
addend += 4;
- if (o_flags & TUNNEL_KEY)
+ if (test_bit(IP_TUNNEL_KEY_BIT, o_flags))
addend += 4;
- if (o_flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, o_flags))
addend += 4;
return addend;
}
-static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
+static inline void gre_flags_to_tnl_flags(unsigned long *dst, __be16 flags)
{
- __be16 tflags = 0;
-
- if (flags & GRE_CSUM)
- tflags |= TUNNEL_CSUM;
- if (flags & GRE_ROUTING)
- tflags |= TUNNEL_ROUTING;
- if (flags & GRE_KEY)
- tflags |= TUNNEL_KEY;
- if (flags & GRE_SEQ)
- tflags |= TUNNEL_SEQ;
- if (flags & GRE_STRICT)
- tflags |= TUNNEL_STRICT;
- if (flags & GRE_REC)
- tflags |= TUNNEL_REC;
- if (flags & GRE_VERSION)
- tflags |= TUNNEL_VERSION;
-
- return tflags;
+ IP_TUNNEL_DECLARE_FLAGS(res) = { };
+
+ __assign_bit(IP_TUNNEL_CSUM_BIT, res, flags & GRE_CSUM);
+ __assign_bit(IP_TUNNEL_ROUTING_BIT, res, flags & GRE_ROUTING);
+ __assign_bit(IP_TUNNEL_KEY_BIT, res, flags & GRE_KEY);
+ __assign_bit(IP_TUNNEL_SEQ_BIT, res, flags & GRE_SEQ);
+ __assign_bit(IP_TUNNEL_STRICT_BIT, res, flags & GRE_STRICT);
+ __assign_bit(IP_TUNNEL_REC_BIT, res, flags & GRE_REC);
+ __assign_bit(IP_TUNNEL_VERSION_BIT, res, flags & GRE_VERSION);
+
+ ip_tunnel_flags_copy(dst, res);
}
-static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags)
+static inline __be16 gre_tnl_flags_to_gre_flags(const unsigned long *tflags)
{
__be16 flags = 0;
- if (tflags & TUNNEL_CSUM)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tflags))
flags |= GRE_CSUM;
- if (tflags & TUNNEL_ROUTING)
+ if (test_bit(IP_TUNNEL_ROUTING_BIT, tflags))
flags |= GRE_ROUTING;
- if (tflags & TUNNEL_KEY)
+ if (test_bit(IP_TUNNEL_KEY_BIT, tflags))
flags |= GRE_KEY;
- if (tflags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tflags))
flags |= GRE_SEQ;
- if (tflags & TUNNEL_STRICT)
+ if (test_bit(IP_TUNNEL_STRICT_BIT, tflags))
flags |= GRE_STRICT;
- if (tflags & TUNNEL_REC)
+ if (test_bit(IP_TUNNEL_REC_BIT, tflags))
flags |= GRE_REC;
- if (tflags & TUNNEL_VERSION)
+ if (test_bit(IP_TUNNEL_VERSION_BIT, tflags))
flags |= GRE_VERSION;
return flags;
}
static inline void gre_build_header(struct sk_buff *skb, int hdr_len,
- __be16 flags, __be16 proto,
+ const unsigned long *flags, __be16 proto,
__be32 key, __be32 seq)
{
+ IP_TUNNEL_DECLARE_FLAGS(cond) = { };
struct gre_base_hdr *greh;
skb_push(skb, hdr_len);
@@ -120,18 +114,22 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len,
greh->flags = gre_tnl_flags_to_gre_flags(flags);
greh->protocol = proto;
- if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
+ __set_bit(IP_TUNNEL_KEY_BIT, cond);
+ __set_bit(IP_TUNNEL_CSUM_BIT, cond);
+ __set_bit(IP_TUNNEL_SEQ_BIT, cond);
+
+ if (ip_tunnel_flags_intersect(flags, cond)) {
__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
- if (flags & TUNNEL_SEQ) {
+ if (test_bit(IP_TUNNEL_SEQ_BIT, flags)) {
*ptr = seq;
ptr--;
}
- if (flags & TUNNEL_KEY) {
+ if (test_bit(IP_TUNNEL_KEY_BIT, flags)) {
*ptr = key;
ptr--;
}
- if (flags & TUNNEL_CSUM &&
+ if (test_bit(IP_TUNNEL_CSUM_BIT, flags) &&
!(skb_shinfo(skb)->gso_type &
(SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
*ptr = 0;
diff --git a/include/net/gro.h b/include/net/gro.h
index 018343254c90..7b548f91754b 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _NET_IPV6_GRO_H
-#define _NET_IPV6_GRO_H
+#ifndef _NET_GRO_H
+#define _NET_GRO_H
#include <linux/indirect_call_wrapper.h>
#include <linux/ip.h>
@@ -9,6 +9,10 @@
#include <net/ip6_checksum.h>
#include <linux/skbuff.h>
#include <net/udp.h>
+#include <net/hotdata.h>
+
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
struct napi_gro_cb {
union {
@@ -35,15 +39,14 @@ struct napi_gro_cb {
/* This is non-zero if the packet cannot be merged with the new skb. */
u16 flush;
- /* Save the IP ID here and check when we get to the transport layer */
- u16 flush_id;
-
/* Number of segments aggregated. */
u16 count;
- /* Used in ipv6_gro_receive() and foo-over-udp */
+ /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
u16 proto;
+ u16 pad;
+
/* Used in napi_gro_cb::free */
#define NAPI_GRO_FREE 1
#define NAPI_GRO_FREE_STOLEN_HEAD 2
@@ -74,8 +77,8 @@ struct napi_gro_cb {
/* Used in GRE, set in fou/gue_gro_receive */
u8 is_fou:1;
- /* Used to determine if flush_id can be ignored */
- u8 is_atomic:1;
+ /* Used to determine if ipid_offset can be ignored */
+ u8 ip_fixedid:1;
/* Number of gro_receive callbacks this packet already went through */
u8 recursion_counter:4;
@@ -148,21 +151,16 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
NAPI_GRO_CB(skb)->data_offset += len;
}
-static inline void *skb_gro_header_fast(struct sk_buff *skb,
+static inline void *skb_gro_header_fast(const struct sk_buff *skb,
unsigned int offset)
{
return NAPI_GRO_CB(skb)->frag0 + offset;
}
-static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
-{
- return NAPI_GRO_CB(skb)->frag0_len < hlen;
-}
-
-static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
+static inline bool skb_gro_may_pull(const struct sk_buff *skb,
+ unsigned int hlen)
{
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
+ return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
}
static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
@@ -171,28 +169,35 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
if (!pskb_may_pull(skb, hlen))
return NULL;
- skb_gro_frag0_invalidate(skb);
return skb->data + offset;
}
-static inline void *skb_gro_header(struct sk_buff *skb,
- unsigned int hlen, unsigned int offset)
+static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
+ unsigned int offset)
{
void *ptr;
ptr = skb_gro_header_fast(skb, offset);
- if (skb_gro_header_hard(skb, hlen))
+ if (!skb_gro_may_pull(skb, hlen))
ptr = skb_gro_header_slow(skb, hlen, offset);
return ptr;
}
-static inline void *skb_gro_network_header(struct sk_buff *skb)
+static inline int skb_gro_receive_network_offset(const struct sk_buff *skb)
{
- return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
- skb_network_offset(skb);
+ return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark];
}
-static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline void *skb_gro_network_header(const struct sk_buff *skb)
+{
+ if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
+ return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb));
+
+ return skb->data + skb_gro_receive_network_offset(skb);
+}
+
+static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct iphdr *iph = skb_gro_network_header(skb);
@@ -430,7 +435,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
return uh;
}
-static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
@@ -438,7 +444,71 @@ static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
skb_gro_len(skb), proto, 0));
}
+static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2,
+ struct sk_buff *p, bool outer)
+{
+ const u32 id = ntohl(*(__be32 *)&iph->id);
+ const u32 id2 = ntohl(*(__be32 *)&iph2->id);
+ const u16 ipid_offset = (id >> 16) - (id2 >> 16);
+ const u16 count = NAPI_GRO_CB(p)->count;
+ const u32 df = id & IP_DF;
+ int flush;
+
+ /* All fields must match except length and checksum. */
+ flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF));
+
+ if (flush | (outer && df))
+ return flush;
+
+ /* When we receive our second frame we can make a decision on if we
+ * continue this flow as an atomic flow with a fixed ID or if we use
+ * an incrementing ID.
+ */
+ if (count == 1 && df && !ipid_offset)
+ NAPI_GRO_CB(p)->ip_fixedid = true;
+
+ return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid);
+}
+
+static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2)
+{
+ /* <Version:4><Traffic_Class:8><Flow_Label:20> */
+ __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2;
+
+ /* Flush if Traffic Class fields are different. */
+ return !!((first_word & htonl(0x0FF00000)) |
+ (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
+}
+
+static inline int __gro_receive_network_flush(const void *th, const void *th2,
+ struct sk_buff *p, const u16 diff,
+ bool outer)
+{
+ const void *nh = th - diff;
+ const void *nh2 = th2 - diff;
+
+ if (((struct iphdr *)nh)->version == 6)
+ return ipv6_gro_flush(nh, nh2);
+ else
+ return inet_gro_flush(nh, nh2, p, outer);
+}
+
+static inline int gro_receive_network_flush(const void *th, const void *th2,
+ struct sk_buff *p)
+{
+ const bool encap_mark = NAPI_GRO_CB(p)->encap_mark;
+ int off = skb_transport_offset(p);
+ int flush;
+
+ flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark);
+ if (encap_mark)
+ flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false);
+
+ return flush;
+}
+
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
static inline void gro_normal_list(struct napi_struct *napi)
@@ -457,7 +527,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= READ_ONCE(gro_normal_batch))
+ if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
gro_normal_list(napi);
}
@@ -504,6 +574,7 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
#endif
}
-extern struct list_head offload_base;
+struct packet_offload *gro_find_receive_by_type(__be16 type);
+struct packet_offload *gro_find_complete_by_type(__be16 type);
-#endif /* _NET_IPV6_GRO_H */
+#endif /* _NET_GRO_H */
diff --git a/include/net/gtp.h b/include/net/gtp.h
index 2a503f035d18..c0253c8702d0 100644
--- a/include/net/gtp.h
+++ b/include/net/gtp.h
@@ -78,4 +78,9 @@ static inline bool netif_is_gtp(const struct net_device *dev)
#define GTP1_F_EXTHDR 0x04
#define GTP1_F_MASK 0x07
+struct gtp_ext_hdr {
+ __u8 len;
+ __u8 data[];
+};
+
#endif
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
new file mode 100644
index 000000000000..30e9570beb2a
--- /dev/null
+++ b/include/net/hotdata.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_HOTDATA_H
+#define _NET_HOTDATA_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <net/protocol.h>
+
+/* Read mostly data used in network fast paths. */
+struct net_hotdata {
+#if IS_ENABLED(CONFIG_INET)
+ struct packet_offload ip_packet_offload;
+ struct net_offload tcpv4_offload;
+ struct net_protocol tcp_protocol;
+ struct net_offload udpv4_offload;
+ struct net_protocol udp_protocol;
+ struct packet_offload ipv6_packet_offload;
+ struct net_offload tcpv6_offload;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol tcpv6_protocol;
+ struct inet6_protocol udpv6_protocol;
+#endif
+ struct net_offload udpv6_offload;
+#endif
+ struct list_head offload_base;
+ struct list_head ptype_all;
+ struct kmem_cache *skbuff_cache;
+ struct kmem_cache *skbuff_fclone_cache;
+ struct kmem_cache *skb_small_head_cache;
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+ u32 rps_cpu_mask;
+#endif
+ int gro_normal_batch;
+ int netdev_budget;
+ int netdev_budget_usecs;
+ int tstamp_prequeue;
+ int max_backlog;
+ int dev_tx_weight;
+ int dev_rx_weight;
+ int sysctl_max_skb_frags;
+ int sysctl_skb_defer_max;
+ int sysctl_mem_pcpu_rsv;
+};
+
+#define inet_ehash_secret net_hotdata.tcp_protocol.secret
+#define udp_ehash_secret net_hotdata.udp_protocol.secret
+#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
+#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret
+#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
+#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret
+
+extern struct net_hotdata net_hotdata;
+
+#endif /* _NET_HOTDATA_H */
diff --git a/include/net/hwbm.h b/include/net/hwbm.h
index aa495decec35..bdbe91c609ff 100644
--- a/include/net/hwbm.h
+++ b/include/net/hwbm.h
@@ -11,9 +11,9 @@ struct hwbm_pool {
int frag_size;
/* Number of buffers currently used by this pool */
int buf_num;
- /* constructor called during alocation */
+ /* constructor called during allocation */
int (*construct)(struct hwbm_pool *bm_pool, void *buf);
- /* protect acces to the buffer counter*/
+ /* protect access to the buffer counter*/
struct mutex buf_lock;
/* private data */
void *priv;
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index c6cb6f642742..813e163ce27c 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -18,7 +18,7 @@
#define __RADIOTAP_H
#include <linux/kernel.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/**
* struct ieee80211_radiotap_header - base radiotap header
@@ -544,6 +544,12 @@ enum ieee80211_radiotap_eht_usig_common {
IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK = 0x00000080,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_20MHZ = 0,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_40MHZ = 1,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_80MHZ = 2,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_160MHZ = 3,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_1 = 4,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_2 = 5,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR = 0x01f80000,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP = 0xfe000000,
@@ -581,6 +587,7 @@ enum ieee80211_radiotap_eht_usig_tb {
/**
* ieee80211_get_radiotap_len - get radiotap header length
* @data: pointer to the header
+ * Return: the radiotap header length
*/
static inline u16 ieee80211_get_radiotap_len(const char *data)
{
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 063313df447d..4de858f9929e 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -125,6 +125,35 @@ struct ieee802154_hdr_fc {
#endif
};
+struct ieee802154_assoc_req_pl {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved1:1,
+ device_type:1,
+ power_source:1,
+ rx_on_when_idle:1,
+ assoc_type:1,
+ reserved2:1,
+ security_cap:1,
+ alloc_addr:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 alloc_addr:1,
+ security_cap:1,
+ reserved2:1,
+ assoc_type:1,
+ rx_on_when_idle:1,
+ power_source:1,
+ device_type:1,
+ reserved1:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+} __packed;
+
+struct ieee802154_assoc_resp_pl {
+ __le16 short_addr;
+ u8 status;
+} __packed;
+
enum ieee802154_frame_version {
IEEE802154_2003_STD,
IEEE802154_2006_STD,
@@ -140,6 +169,19 @@ enum ieee802154_addressing_mode {
IEEE802154_EXTENDED_ADDRESSING,
};
+enum ieee802154_association_status {
+ IEEE802154_ASSOCIATION_SUCCESSFUL = 0x00,
+ IEEE802154_PAN_AT_CAPACITY = 0x01,
+ IEEE802154_PAN_ACCESS_DENIED = 0x02,
+ IEEE802154_HOPPING_SEQUENCE_OFFSET_DUP = 0x03,
+ IEEE802154_FAST_ASSOCIATION_SUCCESSFUL = 0x80,
+};
+
+enum ieee802154_disassociation_reason {
+ IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE = 0x1,
+ IEEE802154_DEVICE_WISHES_TO_LEAVE = 0x2,
+};
+
struct ieee802154_hdr {
struct ieee802154_hdr_fc fc;
u8 seq;
@@ -163,6 +205,24 @@ struct ieee802154_beacon_req_frame {
struct ieee802154_mac_cmd_pl mac_pl;
};
+struct ieee802154_association_req_frame {
+ struct ieee802154_hdr mhr;
+ struct ieee802154_mac_cmd_pl mac_pl;
+ struct ieee802154_assoc_req_pl assoc_req_pl;
+};
+
+struct ieee802154_association_resp_frame {
+ struct ieee802154_hdr mhr;
+ struct ieee802154_mac_cmd_pl mac_pl;
+ struct ieee802154_assoc_resp_pl assoc_resp_pl;
+};
+
+struct ieee802154_disassociation_notif_frame {
+ struct ieee802154_hdr mhr;
+ struct ieee802154_mac_cmd_pl mac_pl;
+ u8 disassoc_pl;
+};
+
/* pushes hdr onto the skb. fields of hdr->fc that can be calculated from
* the contents of hdr will be, and the actual value of those bits in
* hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame
diff --git a/include/net/ieee8021q.h b/include/net/ieee8021q.h
new file mode 100644
index 000000000000..8bfe903dd3d0
--- /dev/null
+++ b/include/net/ieee8021q.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> */
+
+#ifndef _NET_IEEE8021Q_H
+#define _NET_IEEE8021Q_H
+
+#include <linux/errno.h>
+
+/**
+ * enum ieee8021q_traffic_type - 802.1Q traffic type priority values (802.1Q-2022)
+ *
+ * @IEEE8021Q_TT_BK: Background
+ * @IEEE8021Q_TT_BE: Best Effort (default). According to 802.1Q-2022, BE is 0
+ * but has higher priority than BK which is 1.
+ * @IEEE8021Q_TT_EE: Excellent Effort
+ * @IEEE8021Q_TT_CA: Critical Applications
+ * @IEEE8021Q_TT_VI: Video, < 100 ms latency and jitter
+ * @IEEE8021Q_TT_VO: Voice, < 10 ms latency and jitter
+ * @IEEE8021Q_TT_IC: Internetwork Control
+ * @IEEE8021Q_TT_NC: Network Control
+ */
+enum ieee8021q_traffic_type {
+ IEEE8021Q_TT_BK = 0,
+ IEEE8021Q_TT_BE = 1,
+ IEEE8021Q_TT_EE = 2,
+ IEEE8021Q_TT_CA = 3,
+ IEEE8021Q_TT_VI = 4,
+ IEEE8021Q_TT_VO = 5,
+ IEEE8021Q_TT_IC = 6,
+ IEEE8021Q_TT_NC = 7,
+
+ /* private: */
+ IEEE8021Q_TT_MAX,
+};
+
+#define SIMPLE_IETF_DSCP_TO_IEEE8021Q_TT(dscp) ((dscp >> 3) & 0x7)
+
+#if IS_ENABLED(CONFIG_NET_IEEE8021Q_HELPERS)
+
+int ietf_dscp_to_ieee8021q_tt(u8 dscp);
+int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues);
+
+#else
+
+static inline int ietf_dscp_to_ieee8021q_tt(u8 dscp)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt,
+ unsigned int num_queues)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif
+#endif /* _NET_IEEE8021Q_H */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 31bf475eca76..238ad3349456 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -85,7 +85,7 @@ struct ip6_sf_socklist {
unsigned int sl_max;
unsigned int sl_count;
struct rcu_head rcu;
- struct in6_addr sl_addr[];
+ struct in6_addr sl_addr[] __counted_by(sl_max);
};
#define IP6_SFBLOCK 10 /* allocate this many at once */
@@ -144,7 +144,7 @@ struct ipv6_ac_socklist {
struct ifacaddr6 {
struct in6_addr aca_addr;
struct fib6_info *aca_rt;
- struct ifacaddr6 *aca_next;
+ struct ifacaddr6 __rcu *aca_next;
struct hlist_node aca_addr_lst;
int aca_users;
refcount_t aca_refcnt;
@@ -196,7 +196,7 @@ struct inet6_dev {
spinlock_t mc_report_lock; /* mld query report lock */
struct mutex mc_lock; /* mld global lock */
- struct ifacaddr6 *ac_list;
+ struct ifacaddr6 __rcu *ac_list;
rwlock_t lock;
refcount_t refcnt;
__u32 if_flags;
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 533a7337865a..74dd90ff5f12 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -40,7 +40,7 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash,
*
* The sockhash lock must be held as a reader here.
*/
-struct sock *__inet6_lookup_established(struct net *net,
+struct sock *__inet6_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const __be16 sport,
@@ -56,7 +56,7 @@ inet6_ehashfn_t inet6_ehashfn;
INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn);
-struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
__be16 sport,
@@ -64,7 +64,7 @@ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
unsigned short hnum,
inet6_ehashfn_t *ehashfn);
-struct sock *inet6_lookup_listener(struct net *net,
+struct sock *inet6_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -73,7 +73,7 @@ struct sock *inet6_lookup_listener(struct net *net,
const unsigned short hnum,
const int dif, const int sdif);
-struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -82,7 +82,7 @@ struct sock *inet6_lookup_run_sk_lookup(struct net *net,
const u16 hnum, const int dif,
inet6_ehashfn_t *ehashfn);
-static inline struct sock *__inet6_lookup(struct net *net,
+static inline struct sock *__inet6_lookup(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -167,7 +167,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
iif, sdif, refcounted);
}
-struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
@@ -175,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
int inet6_hash(struct sock *sk);
-static inline bool inet6_match(struct net *net, const struct sock *sk,
+static inline bool inet6_match(const struct net *net, const struct sock *sk,
const struct in6_addr *saddr,
const struct in6_addr *daddr,
const __portpair ports,
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index f50a644d87a9..c17a6585d0b0 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -29,8 +29,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags, int is_sendmsg);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);
-int inet_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern);
+int inet_accept(struct socket *sock, struct socket *newsock,
+ struct proto_accept_arg *arg);
void __inet_accept(struct socket *sock, struct socket *newsock,
struct sock *newsk);
int inet_send_prepare(struct sock *sk);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index fee1e5650551..4bd93571e6c1 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -44,7 +44,6 @@ struct inet_connection_sock_af_ops {
struct request_sock *req_unhash,
bool *own_req);
u16 net_header_len;
- u16 net_frag_header_len;
u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
@@ -114,7 +113,10 @@ struct inet_connection_sock {
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
__u8 retry; /* Number of attempts */
- __u32 ato; /* Predicted tick of soft clock */
+ #define ATO_BITS 8
+ __u32 ato:ATO_BITS, /* Predicted tick of soft clock */
+ lrcv_flowlabel:20, /* last received ipv6 flowlabel */
+ unused:4;
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
@@ -145,10 +147,7 @@ struct inet_connection_sock {
#define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */
#define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */
-static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
-{
- return (struct inet_connection_sock *)sk;
-}
+#define inet_csk(ptr) container_of_const(ptr, struct inet_connection_sock, icsk_inet.sk)
static inline void *inet_csk_ca(const struct sock *sk)
{
@@ -251,7 +250,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
return (unsigned long)min_t(u64, when, max_when);
}
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
+struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg);
int inet_csk_get_port(struct sock *sk, unsigned short snum);
@@ -282,7 +281,7 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
- return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog;
+ return inet_csk_reqsk_queue_len(sk) > READ_ONCE(sk->sk_max_ack_backlog);
}
bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
@@ -326,11 +325,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
-#define TCP_PINGPONG_THRESH 1
-
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
- inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
+ inet_csk(sk)->icsk_ack.pingpong =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
}
static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
@@ -340,7 +338,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
{
- return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+ return inet_csk(sk)->icsk_ack.pingpong >=
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
+}
+
+static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ack.pingpong < U8_MAX)
+ icsk->icsk_ack.pingpong++;
}
static inline bool inet_csk_has_ulp(const struct sock *sk)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 153960663ce4..5af6eb14c5db 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -76,7 +76,7 @@ struct frag_v6_compare_key {
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
- * @mono_delivery_time: stamp has a mono delivery time (EDT)
+ * @tstamp_type: stamp has a mono delivery time (EDT)
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @fqdir: pointer to struct fqdir
@@ -97,7 +97,7 @@ struct inet_frag_queue {
ktime_t stamp;
int len;
int meat;
- u8 mono_delivery_time;
+ u8 tstamp_type;
__u8 flags;
u16 max_size;
struct fqdir *fqdir;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 3ecfeadbfa06..5eea47f135a4 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -88,7 +88,7 @@ struct inet_bind_bucket {
unsigned short fast_sk_family;
bool fast_ipv6_only;
struct hlist_node node;
- struct hlist_head owners;
+ struct hlist_head bhash2;
};
struct inet_bind2_bucket {
@@ -96,22 +96,17 @@ struct inet_bind2_bucket {
int l3mdev;
unsigned short port;
#if IS_ENABLED(CONFIG_IPV6)
- unsigned short family;
-#endif
- union {
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr v6_rcv_saddr;
+ unsigned short addr_type;
+ struct in6_addr v6_rcv_saddr;
+#define rcv_saddr v6_rcv_saddr.s6_addr32[3]
+#else
+ __be32 rcv_saddr;
#endif
- __be32 rcv_saddr;
- };
/* Node in the bhash2 inet_bind_hashbucket chain */
struct hlist_node node;
+ struct hlist_node bhash_node;
/* List of sockets hashed to this bucket */
struct hlist_head owners;
- /* bhash has twsk in owners, but bhash2 has twsk in
- * deathrow not to add a member in struct sock_common.
- */
- struct hlist_head deathrow;
};
static inline struct net *ib_net(const struct inet_bind_bucket *ib)
@@ -241,7 +236,7 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
struct inet_bind2_bucket *
inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
struct inet_bind_hashbucket *head,
- unsigned short port, int l3mdev,
+ struct inet_bind_bucket *tb,
const struct sock *sk);
void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
@@ -309,7 +304,7 @@ int __inet_hash(struct sock *sk, struct sock *osk);
int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
-struct sock *__inet_lookup_listener(struct net *net,
+struct sock *__inet_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
@@ -356,7 +351,7 @@ static inline struct sock *inet_lookup_listener(struct net *net,
((__force __u64)(__be32)(__saddr)))
#endif /* __BIG_ENDIAN */
-static inline bool inet_match(struct net *net, const struct sock *sk,
+static inline bool inet_match(const struct net *net, const struct sock *sk,
const __addrpair cookie, const __portpair ports,
int dif, int sdif)
{
@@ -373,7 +368,7 @@ static inline bool inet_match(struct net *net, const struct sock *sk,
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
*/
-struct sock *__inet_lookup_established(struct net *net,
+struct sock *__inet_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
@@ -387,13 +382,13 @@ inet_ehashfn_t inet_ehashfn;
INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn);
-struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
inet_ehashfn_t *ehashfn);
-struct sock *inet_lookup_run_sk_lookup(struct net *net,
+struct sock *inet_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 2790ba58ffe5..394c3b66065e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -150,7 +150,8 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if,
return bound_dev_if == dif || bound_dev_if == sdif;
}
-static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+static inline bool inet_sk_bound_dev_eq(const struct net *net,
+ int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
@@ -234,17 +235,13 @@ struct inet_sock {
int uc_index;
int mc_index;
__be32 mc_addr;
- struct {
- __u16 lo;
- __u16 hi;
- } local_port_range;
+ u32 local_port_range; /* high << 16 | low */
struct ip_mc_socklist __rcu *mc_list;
struct inet_cork_full cork;
};
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
-#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */
enum {
INET_FLAGS_PKTINFO = 0,
@@ -268,6 +265,17 @@ enum {
INET_FLAGS_NODEFRAG = 17,
INET_FLAGS_BIND_ADDRESS_NO_PORT = 18,
INET_FLAGS_DEFER_CONNECT = 19,
+ INET_FLAGS_MC6_LOOP = 20,
+ INET_FLAGS_RECVERR6_RFC4884 = 21,
+ INET_FLAGS_MC6_ALL = 22,
+ INET_FLAGS_AUTOFLOWLABEL_SET = 23,
+ INET_FLAGS_AUTOFLOWLABEL = 24,
+ INET_FLAGS_DONTFRAG = 25,
+ INET_FLAGS_RECVERR6 = 26,
+ INET_FLAGS_REPFLOW = 27,
+ INET_FLAGS_RTALERT_ISOLATE = 28,
+ INET_FLAGS_SNDFLOW = 29,
+ INET_FLAGS_RTALERT = 30,
};
/* cmsg flags for inet */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 9365e5af8d6d..beb533a0e880 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -58,7 +58,7 @@ struct inet_timewait_sock {
#define tw_dr __tw_common.skc_tw_dr
__u32 tw_mark;
- volatile unsigned char tw_substate;
+ unsigned char tw_substate;
unsigned char tw_rcv_wscale;
/* Socket demultiplex comparisons on incoming packets. */
@@ -67,20 +67,17 @@ struct inet_timewait_sock {
/* And these are ours. */
unsigned int tw_transparent : 1,
tw_flowlabel : 20,
- tw_pad : 3, /* 3 bits hole */
+ tw_usec_ts : 1,
+ tw_pad : 2, /* 2 bits hole */
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
struct inet_bind2_bucket *tw_tb2;
- struct hlist_node tw_bind2_node;
};
#define tw_tclass tw_tos
-#define twsk_for_each_bound_bhash2(__tw, list) \
- hlist_for_each_entry(__tw, list, tw_bind2_node)
-
static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
{
return (struct inet_timewait_sock *)sk;
@@ -96,17 +93,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
struct inet_timewait_death_row *dr,
const int state);
-void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
- struct inet_hashinfo *hashinfo);
+void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
+ struct sock *sk,
+ struct inet_hashinfo *hashinfo,
+ int timeo);
void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
bool rearm);
-static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
-{
- __inet_twsk_schedule(tw, timeo, false);
-}
-
static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo)
{
__inet_twsk_schedule(tw, timeo, true);
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 74ff688568a0..f475757daafb 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -96,30 +96,28 @@ static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr)
/* can be called with or without local BH being disabled */
struct inet_peer *inet_getpeer(struct inet_peer_base *base,
- const struct inetpeer_addr *daddr,
- int create);
+ const struct inetpeer_addr *daddr);
static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
__be32 v4daddr,
- int vif, int create)
+ int vif)
{
struct inetpeer_addr daddr;
daddr.a4.addr = v4daddr;
daddr.a4.vif = vif;
daddr.family = AF_INET;
- return inet_getpeer(base, &daddr, create);
+ return inet_getpeer(base, &daddr);
}
static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
- const struct in6_addr *v6daddr,
- int create)
+ const struct in6_addr *v6daddr)
{
struct inetpeer_addr daddr;
daddr.a6 = *v6daddr;
daddr.family = AF_INET6;
- return inet_getpeer(base, &daddr, create);
+ return inet_getpeer(base, &daddr);
}
static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a,
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
index 781d2d8b2f29..2cbbee6e806a 100644
--- a/include/net/ioam6.h
+++ b/include/net/ioam6.h
@@ -12,6 +12,7 @@
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
#include <linux/rhashtable-types.h>
struct ioam6_namespace {
@@ -65,4 +66,7 @@ void ioam6_exit(void);
int ioam6_iptunnel_init(void);
void ioam6_iptunnel_exit(void);
+void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp,
+ void *opt, unsigned int opt_len);
+
#endif /* _NET_IOAM6_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 7db5912e0c5f..bd201278c55a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -33,6 +33,7 @@
#include <net/flow_dissector.h>
#include <net/netns/hash.h>
#include <net/lwtunnel.h>
+#include <net/inet_dscp.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU 68 /* RFC 791 */
@@ -258,7 +259,9 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
{
- return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
+ u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos);
+
+ return dsfield & INET_DSCP_MASK;
}
/* datagram.c */
@@ -349,8 +352,14 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
} \
}
-void inet_get_local_port_range(const struct net *net, int *low, int *high);
-void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
+static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
+{
+ u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);
+
+ *low = range & 0xffff;
+ *high = range >> 16;
+}
+bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
#ifdef CONFIG_SYSCTL
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
@@ -415,9 +424,14 @@ int ip_decrease_ttl(struct iphdr *iph)
return --iph->ttl;
}
+static inline dscp_t ip4h_dscp(const struct iphdr *ip4h)
+{
+ return inet_dsfield_to_dscp(ip4h->tos);
+}
+
static inline int ip_mtu_locked(const struct dst_entry *dst)
{
- const struct rtable *rt = (const struct rtable *)dst;
+ const struct rtable *rt = dst_rtable(dst);
return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
}
@@ -434,28 +448,34 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
static inline bool ip_sk_accept_pmtu(const struct sock *sk)
{
- return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE &&
- inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);
+
+ return pmtudisc != IP_PMTUDISC_INTERFACE &&
+ pmtudisc != IP_PMTUDISC_OMIT;
}
static inline bool ip_sk_use_pmtu(const struct sock *sk)
{
- return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
+ return READ_ONCE(inet_sk(sk)->pmtudisc) < IP_PMTUDISC_PROBE;
}
static inline bool ip_sk_ignore_df(const struct sock *sk)
{
- return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
- inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);
+
+ return pmtudisc < IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_OMIT;
}
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding)
{
- const struct rtable *rt = container_of(dst, struct rtable, dst);
- struct net *net = dev_net(dst->dev);
- unsigned int mtu;
+ const struct rtable *rt = dst_rtable(dst);
+ unsigned int mtu, res;
+ struct net *net;
+ rcu_read_lock();
+
+ net = dev_net_rcu(dst->dev);
if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
@@ -479,7 +499,11 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
out:
mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ res = mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+
+ rcu_read_unlock();
+
+ return res;
}
static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
@@ -785,11 +809,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
}
-bool icmp_global_allow(void);
-void icmp_global_consume(void);
-
-extern int sysctl_icmp_msgs_per_sec;
-extern int sysctl_icmp_msgs_burst;
+bool icmp_global_allow(struct net *net);
+void icmp_global_consume(struct net *net);
#ifdef CONFIG_PROC_FS
int ip_misc_proc_init(void);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1121d614942c..6cb867ce4878 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -30,12 +30,6 @@
#define RT6_DEBUG 2
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
struct rt6_info;
struct fib6_info;
@@ -179,6 +173,9 @@ struct fib6_info {
refcount_t fib6_ref;
unsigned long expires;
+
+ struct hlist_node gc_link;
+
struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
@@ -249,12 +246,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
return rt->fib6_src.plen > 0;
}
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
static inline void fib6_clean_expires(struct fib6_info *f6i)
{
f6i->fib6_flags &= ~RTF_EXPIRES;
f6i->expires = 0;
}
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
static inline void fib6_set_expires(struct fib6_info *f6i,
unsigned long expires)
{
@@ -335,8 +338,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
static inline void fib6_info_release(struct fib6_info *f6i)
{
- if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
- call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
+ if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
+ DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
+ call_rcu_hurry(&f6i->rcu, fib6_info_destroy_rcu);
+ }
}
enum fib6_walk_state {
@@ -390,6 +395,7 @@ struct fib6_table {
struct inet_peer_base tb6_peers;
unsigned int flags;
unsigned int fib_seq;
+ struct hlist_head tb6_gc_hlist; /* GC candidates */
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
};
@@ -506,6 +512,38 @@ void fib6_gc_cleanup(void);
int fib6_init(void);
+/* Add the route to the gc list if it is not already there
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
+ */
+static inline void fib6_add_gc_list(struct fib6_info *f6i)
+{
+ /* If fib6_node is null, the f6i is not in (or removed from) the
+ * table.
+ *
+ * There is a gap between finding the f6i from the table and
+ * calling this function without the protection of the tb6_lock.
+ * This check makes sure the f6i is not added to the gc list when
+ * it is not on the table.
+ */
+ if (!rcu_dereference_protected(f6i->fib6_node,
+ lockdep_is_held(&f6i->fib6_table->tb6_lock)))
+ return;
+
+ if (hlist_unhashed(&f6i->gc_link))
+ hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
+}
+
+/* Remove the route from the gc list if it is on the list.
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
+ */
+static inline void fib6_remove_gc_list(struct fib6_info *f6i)
+{
+ if (!hlist_unhashed(&f6i->gc_link))
+ hlist_del_init(&f6i->gc_link);
+}
+
struct ipv6_route_iter {
struct seq_net_private p;
struct fib6_walker w;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 392232fcd703..6dbdf60b342f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -53,13 +53,12 @@ struct route_info {
*/
static inline int rt6_srcprefs2flags(unsigned int srcprefs)
{
- /* No need to bitmask because srcprefs have only 3 bits. */
- return srcprefs << 3;
+ return (srcprefs & IPV6_PREFER_SRC_MASK) << 3;
}
static inline unsigned int rt6_flags2srcprefs(int flags)
{
- return (flags >> 3) & 7;
+ return (flags >> 3) & IPV6_PREFER_SRC_MASK;
}
static inline bool rt6_need_strict(const struct in6_addr *daddr)
@@ -179,7 +178,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev, unsigned int pref,
- u32 defrtr_usr_metric);
+ u32 defrtr_usr_metric,
+ int lifetime);
void rt6_purge_dflt_routers(struct net *net);
@@ -273,7 +273,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
const struct dst_entry *dst = skb_dst(skb);
unsigned int mtu;
- if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+ if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
mtu = READ_ONCE(dst->dev->mtu);
mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
} else {
@@ -284,14 +284,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
{
- return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
- inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+ return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
+ pmtudisc != IPV6_PMTUDISC_OMIT;
}
static inline bool ip6_sk_ignore_df(const struct sock *sk)
{
- return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
- inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+ return pmtudisc < IPV6_PMTUDISC_DO ||
+ pmtudisc == IPV6_PMTUDISC_OMIT;
}
static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
@@ -335,7 +339,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
if (idev)
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
out:
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 74b369bddf49..399592405c72 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -30,8 +30,8 @@ struct __ip6_tnl_parm {
struct in6_addr laddr; /* local tunnel end-point address */
struct in6_addr raddr; /* remote tunnel end-point address */
- __be16 i_flags;
- __be16 o_flags;
+ IP_TUNNEL_DECLARE_FLAGS(i_flags);
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
__be32 i_key;
__be32 o_key;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ca1700c2a573..967e4dc555fa 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -22,6 +22,8 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/refcount.h>
+#include <linux/ip.h>
+#include <linux/in_route.h>
struct fib_config {
u8 fc_dst_len;
@@ -157,7 +159,7 @@ struct fib_info {
bool pfsrc_removed;
struct nexthop *nh;
struct rcu_head rcu;
- struct fib_nh fib_nh[];
+ struct fib_nh fib_nh[] __counted_by(fib_nhs);
};
@@ -265,6 +267,7 @@ struct fib_dump_filter {
bool filter_set;
bool dump_routes;
bool dump_exceptions;
+ bool rtnl_held;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
@@ -433,6 +436,11 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
#endif /* CONFIG_IP_MULTIPLE_TABLES */
+static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4)
+{
+ return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos));
+}
+
/* Exported by fib_frontend.c */
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
@@ -520,7 +528,35 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys);
+
+static void
+fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed)
+{
+ u64 mp_seed_64 = mp_seed;
+
+ key->key[0] = (mp_seed_64 << 32) | mp_seed_64;
+ key->key[1] = key->key[0];
+}
+
+static inline u32 fib_multipath_hash_from_keys(const struct net *net,
+ struct flow_keys *keys)
+{
+ siphash_aligned_key_t hash_key;
+ u32 mp_seed;
+
+ mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
+ fib_multipath_hash_construct_key(&hash_key, mp_seed);
+
+ return flow_hash_from_keys_seed(keys, &hash_key);
+}
+#else
+static inline u32 fib_multipath_hash_from_keys(const struct net *net,
+ struct flow_keys *keys)
+{
+ return flow_hash_from_keys(keys);
+}
#endif
+
int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
struct netlink_ext_ack *extack);
void fib_select_multipath(struct fib_result *res, int hash);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 006a61ddd36f..6a070478254d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -36,6 +36,24 @@
(sizeof_field(struct ip_tunnel_key, u) - \
sizeof_field(struct ip_tunnel_key, u.ipv4))
+#define __ipt_flag_op(op, ...) \
+ op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM)
+
+#define IP_TUNNEL_DECLARE_FLAGS(...) \
+ __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__)
+
+#define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__)
+#define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__)
+#define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__)
+#define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__)
+
+#define ip_tunnel_flags_empty(...) \
+ __ipt_flag_op(bitmap_empty, __VA_ARGS__)
+#define ip_tunnel_flags_intersect(...) \
+ __ipt_flag_op(bitmap_intersects, __VA_ARGS__)
+#define ip_tunnel_flags_subset(...) \
+ __ipt_flag_op(bitmap_subset, __VA_ARGS__)
+
struct ip_tunnel_key {
__be64 tun_id;
union {
@@ -48,11 +66,11 @@ struct ip_tunnel_key {
struct in6_addr dst;
} ipv6;
} u;
- __be16 tun_flags;
- u8 tos; /* TOS for IPv4, TC for IPv6 */
- u8 ttl; /* TTL for IPv4, HL for IPv6 */
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags);
__be32 label; /* Flow Label for IPv6 */
u32 nhid;
+ u8 tos; /* TOS for IPv4, TC for IPv6 */
+ u8 ttl; /* TTL for IPv4, HL for IPv6 */
__be16 tp_src;
__be16 tp_dst;
__u8 flow_flags;
@@ -110,6 +128,17 @@ struct ip_tunnel_prl_entry {
struct metadata_dst;
+/* Kernel-side variant of ip_tunnel_parm */
+struct ip_tunnel_parm_kern {
+ char name[IFNAMSIZ];
+ IP_TUNNEL_DECLARE_FLAGS(i_flags);
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+ __be32 i_key;
+ __be32 o_key;
+ int link;
+ struct iphdr iph;
+};
+
struct ip_tunnel {
struct ip_tunnel __rcu *next;
struct hlist_node hash_node;
@@ -136,7 +165,7 @@ struct ip_tunnel {
struct dst_cache dst_cache;
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
int mlink;
int encap_hlen; /* Encap header length (FOU,GUE) */
@@ -157,7 +186,7 @@ struct ip_tunnel {
};
struct tnl_ptk_info {
- __be16 flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 proto;
__be32 key;
__be32 seq;
@@ -179,11 +208,80 @@ struct ip_tunnel_net {
int type;
};
+static inline void ip_tunnel_set_options_present(unsigned long *flags)
+{
+ IP_TUNNEL_DECLARE_FLAGS(present) = { };
+
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_GTP_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present);
+
+ ip_tunnel_flags_or(flags, flags, present);
+}
+
+static inline void ip_tunnel_clear_options_present(unsigned long *flags)
+{
+ IP_TUNNEL_DECLARE_FLAGS(present) = { };
+
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_GTP_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present);
+
+ __ipt_flag_op(bitmap_andnot, flags, flags, present);
+}
+
+static inline bool ip_tunnel_is_options_present(const unsigned long *flags)
+{
+ IP_TUNNEL_DECLARE_FLAGS(present) = { };
+
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_GTP_OPT_BIT, present);
+ __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present);
+
+ return ip_tunnel_flags_intersect(flags, present);
+}
+
+static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags)
+{
+ IP_TUNNEL_DECLARE_FLAGS(supp) = { };
+
+ bitmap_set(supp, 0, BITS_PER_TYPE(__be16));
+ __set_bit(IP_TUNNEL_VTI_BIT, supp);
+
+ return ip_tunnel_flags_subset(flags, supp);
+}
+
+static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags)
+{
+ ip_tunnel_flags_zero(dst);
+
+ bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16));
+ __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI);
+}
+
+static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags)
+{
+ __be16 ret;
+
+ ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16)));
+ if (test_bit(IP_TUNNEL_VTI_BIT, flags))
+ ret |= VTI_ISVTI;
+
+ return ret;
+}
+
static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
__be32 saddr, __be32 daddr,
u8 tos, u8 ttl, __be32 label,
__be16 tp_src, __be16 tp_dst,
- __be64 tun_id, __be16 tun_flags)
+ __be64 tun_id,
+ const unsigned long *tun_flags)
{
key->tun_id = tun_id;
key->u.ipv4.src = saddr;
@@ -193,7 +291,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
key->tos = tos;
key->ttl = ttl;
key->label = label;
- key->tun_flags = tun_flags;
+ ip_tunnel_flags_copy(key->tun_flags, tun_flags);
/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
* the upper tunnel are used.
@@ -214,12 +312,8 @@ ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
{
if (skb->mark)
return false;
- if (!info)
- return true;
- if (info->key.tun_flags & TUNNEL_NOCACHE)
- return false;
- return true;
+ return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags);
}
static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
@@ -284,20 +378,25 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
- struct rtnl_link_ops *ops);
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const u8 proto, int tunnel_hlen);
-int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd);
+bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
+ const void __user *data);
+bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp);
int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
void __user *data, int cmd);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
- int link, __be16 flags,
+ int link, const unsigned long *flags,
__be32 remote, __be32 local,
__be32 key);
@@ -306,16 +405,16 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark);
+ struct ip_tunnel_parm_kern *p, __u32 fwmark);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark);
+ struct ip_tunnel_parm_kern *p, __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *encap);
void ip_tunnel_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms);
+ struct ip_tunnel_parm_kern *parms);
extern const struct header_ops ip_tunnel_header_ops;
__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
@@ -450,6 +549,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
return 0;
}
+static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ __be16 payload_protocol = skb_protocol(skb, true);
+
+ if (payload_protocol == htons(ETH_P_IPV6))
+ return ip6_flowlabel((const struct ipv6hdr *)iph);
+ else
+ return 0;
+}
+
static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
const struct sk_buff *skb)
{
@@ -463,7 +573,7 @@ static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
return 0;
}
-/* Propogate ECN bits out */
+/* Propagate ECN bits out */
static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
const struct sk_buff *skb)
{
@@ -536,12 +646,13 @@ static inline void ip_tunnel_info_opts_get(void *to,
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
const void *from, int len,
- __be16 flags)
+ const unsigned long *flags)
{
info->options_len = len;
if (len > 0) {
memcpy(ip_tunnel_info_opts(info), from, len);
- info->key.tun_flags |= flags;
+ ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags,
+ flags);
}
}
@@ -585,7 +696,7 @@ static inline void ip_tunnel_info_opts_get(void *to,
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
const void *from, int len,
- __be16 flags)
+ const unsigned long *flags)
{
info->options_len = 0;
}
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c6932d1a3fa8..6d52b5584d2f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -363,22 +363,13 @@ struct ipcm6_cookie {
struct ipv6_txoptions *opt;
};
-static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
-{
- *ipc6 = (struct ipcm6_cookie) {
- .hlimit = -1,
- .tclass = -1,
- .dontfrag = -1,
- };
-}
-
static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
- const struct ipv6_pinfo *np)
+ const struct sock *sk)
{
*ipc6 = (struct ipcm6_cookie) {
.hlimit = -1,
- .tclass = np->tclass,
- .dontfrag = np->dontfrag,
+ .tclass = inet6_sk(sk)->tclass,
+ .dontfrag = inet6_test_bit(DONTFRAG, sk),
};
}
@@ -428,7 +419,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
@@ -534,13 +525,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb)
return 0;
}
-static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+static inline bool ipv6_accept_ra(const struct inet6_dev *idev)
{
+ s32 accept_ra = READ_ONCE(idev->cnf.accept_ra);
+
/* If forwarding is enabled, RA are not accepted unless the special
* hybrid mode (accept_ra=2) is enabled.
*/
- return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 :
- idev->cnf.accept_ra;
+ return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 :
+ accept_ra;
}
#define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
@@ -784,11 +777,6 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
cpu_to_be32(0x0000ffff))) == 0UL;
}
-static inline bool ipv6_addr_v4mapped_any(const struct in6_addr *a)
-{
- return ipv6_addr_v4mapped(a) && ipv4_is_zeronet(a->s6_addr32[3]);
-}
-
static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a)
{
return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]);
@@ -854,7 +842,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
* we should *never* get to this point since that
* would mean the addrs are equal
*
- * However, we do get to it 8) And exacly, when
+ * However, we do get to it 8) And exactly, when
* addresses are equal 8)
*
* ip route add 1111::/128 via ...
@@ -914,9 +902,9 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
int hlimit;
if (ipv6_addr_is_multicast(&fl6->daddr))
- hlimit = np->mcast_hops;
+ hlimit = READ_ONCE(np->mcast_hops);
else
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
return hlimit;
@@ -976,7 +964,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
hash = skb_get_hash_flowi6(skb, fl6);
/* Since this is being sent on the wire obfuscate hash a bit
- * to minimize possbility that any useful information to an
+ * to minimize possibility that any useful information to an
* attacker is leaked. Only lower 20 bits are relevant.
*/
hash = rol32(hash, 16);
@@ -1133,12 +1121,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
bool connected);
-struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
- struct net_device *dev,
- struct net *net, struct socket *sock,
- struct in6_addr *saddr,
- const struct ip_tunnel_info *info,
- u8 protocol, bool use_cache);
struct dst_entry *ip6_blackhole_route(struct net *net,
struct dst_entry *orig_dst);
@@ -1303,15 +1285,16 @@ static inline int ip6_sock_set_v6only(struct sock *sk)
static inline void ip6_sock_set_recverr(struct sock *sk)
{
- lock_sock(sk);
- inet6_sk(sk)->recverr = true;
- release_sock(sk);
+ inet6_set_bit(RECVERR6, sk);
}
-static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \
+ IPV6_PREFER_SRC_COA)
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
{
+ unsigned int prefmask = ~IPV6_PREFER_SRC_MASK;
unsigned int pref = 0;
- unsigned int prefmask = ~0;
/* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
switch (val & (IPV6_PREFER_SRC_PUBLIC |
@@ -1361,25 +1344,28 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
return -EINVAL;
}
- inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+ WRITE_ONCE(inet6_sk(sk)->srcprefs,
+ (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref);
return 0;
}
-static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
+static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
{
- int ret;
-
lock_sock(sk);
- ret = __ip6_sock_set_addr_preferences(sk, val);
+ inet6_sk(sk)->rxopt.bits.rxinfo = true;
release_sock(sk);
- return ret;
}
-static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
+#define IPV6_ADDR_WORDS 4
+
+static inline void ipv6_addr_cpu_to_be32(__be32 *dst, const u32 *src)
{
- lock_sock(sk);
- inet6_sk(sk)->rxopt.bits.rxinfo = true;
- release_sock(sk);
+ cpu_to_be32_array(dst, src, IPV6_ADDR_WORDS);
+}
+
+static inline void ipv6_addr_be32_to_cpu(u32 *dst, const __be32 *src)
+{
+ be32_to_cpu_array(dst, src, IPV6_ADDR_WORDS);
}
#endif /* _NET_IPV6_H */
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 21da31e1dff5..8a3465c8c2c5 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -9,6 +9,7 @@
#include <net/flow.h>
#include <net/neighbour.h>
#include <net/sock.h>
+#include <net/ipv6.h>
/* structs from net/ip6_fib.h */
struct fib6_info;
@@ -60,6 +61,9 @@ struct ipv6_stub {
#if IS_ENABLED(CONFIG_XFRM)
void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+ struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb);
int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
#endif
@@ -69,6 +73,8 @@ struct ipv6_stub {
int (*output)(struct net *, struct sock *, struct sk_buff *));
struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr,
struct net_device *dev);
+ int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+ __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
@@ -76,7 +82,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
struct ipv6_bpf_stub {
int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
u32 flags);
- struct sock *(*udp6_lib_lookup)(struct net *net,
+ struct sock *(*udp6_lib_lookup)(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *tbl,
diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index f9e88401d7da..dd9e93c12260 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -15,7 +15,7 @@
* To explore any of the IUCV functions, one must first register their
* program using iucv_register(). Once your program has successfully
* completed a register, it can exploit the other functions.
- * For furthur reference on all IUCV functionality, refer to the
+ * For further reference on all IUCV functionality, refer to the
* CP Programming Services book, also available on the web thru
* www.vm.ibm.com/pubs, manual # SC24-6084
*
@@ -30,6 +30,7 @@
#include <linux/types.h>
#include <linux/slab.h>
+#include <asm/dma-types.h>
#include <asm/debug.h>
/*
@@ -76,12 +77,17 @@
* and iucv_message_reply if IUCV_IPBUFLST or IUCV_IPANSLST are used.
*/
struct iucv_array {
- u32 address;
+ dma32_t address;
u32 length;
} __attribute__ ((aligned (8)));
-extern struct bus_type iucv_bus;
-extern struct device *iucv_root;
+extern const struct bus_type iucv_bus;
+
+struct device_driver;
+
+struct device *iucv_alloc_device(const struct attribute_group **attrs,
+ struct device_driver *driver, void *priv,
+ const char *fmt, ...) __printf(4, 5);
/*
* struct iucv_path
@@ -489,7 +495,7 @@ struct iucv_interface {
int (*path_sever)(struct iucv_path *path, u8 userdata[16]);
int (*iucv_register)(struct iucv_handler *handler, int smp);
void (*iucv_unregister)(struct iucv_handler *handler, int smp);
- struct bus_type *bus;
+ const struct bus_type *bus;
struct device *root;
};
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index b2cf243ebe44..7af1082ea9a0 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -23,7 +23,7 @@
* to handle wireless statistics.
*
* The initial APIs served us well and has proven a reasonably good design.
- * However, there is a few shortcommings :
+ * However, there are a few shortcomings :
* o No events, everything is a request to the driver.
* o Large ioctl function in driver with gigantic switch statement
* (i.e. spaghetti code).
@@ -38,13 +38,13 @@
* -------------------------------
* The new driver API is just a bunch of standard functions (handlers),
* each handling a specific Wireless Extension. The driver just export
- * the list of handler it supports, and those will be called apropriately.
+ * the list of handler it supports, and those will be called appropriately.
*
* I tried to keep the main advantage of the previous API (simplicity,
* efficiency and light weight), and also I provide a good dose of backward
* compatibility (most structures are the same, driver can use both API
* simultaneously, ...).
- * Hopefully, I've also addressed the shortcomming of the initial API.
+ * Hopefully, I've also addressed the shortcoming of the initial API.
*
* The advantage of the new API are :
* o Handling of Extensions in driver broken in small contained functions
@@ -84,7 +84,7 @@
/* ---------------------- THE IMPLEMENTATION ---------------------- */
/*
- * Some of the choice I've made are pretty controversials. Defining an
+ * Some of the choice I've made are pretty controversial. Defining an
* API is very much weighting compromises. This goes into some of the
* details and the thinking behind the implementation.
*
@@ -140,7 +140,7 @@
* example to distinguish setting max rate and basic rate), I would
* break the prototype. Using iwreq_data is more flexible.
* 3) Also, the above form is not generic (see above).
- * 4) I don't expect driver developper using the wrong field of the
+ * 4) I don't expect driver developer using the wrong field of the
* union (Doh !), so static typechecking doesn't add much value.
* 5) Lastly, you can skip the union by doing :
* static int mydriver_ioctl_setrate(struct net_device *dev,
@@ -459,7 +459,7 @@ int iw_handler_get_thrspy(struct net_device *dev, struct iw_request_info *info,
void wireless_spy_update(struct net_device *dev, unsigned char *address,
struct iw_quality *wstats);
-/************************* INLINE FUNTIONS *************************/
+/************************* INLINE FUNCTIONS *************************/
/*
* Function that are so simple that it's more efficient inlining them
*/
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 031c661aa14d..bdfa9d414360 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -198,10 +198,12 @@ struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto)
if (netif_is_l3_slave(dev)) {
struct net_device *master;
+ rcu_read_lock();
master = netdev_master_upper_dev_get_rcu(dev);
if (master && master->l3mdev_ops->l3mdev_l3_out)
skb = master->l3mdev_ops->l3mdev_l3_out(master, sk,
skb, proto);
+ rcu_read_unlock();
}
return skb;
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index 8b47d3a51cf8..fd0f15d87d80 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -92,7 +92,7 @@ struct lib80211_crypto_ops {
struct lib80211_crypt_data {
struct list_head list; /* delayed deletion list */
- struct lib80211_crypto_ops *ops;
+ const struct lib80211_crypto_ops *ops;
void *priv;
atomic_t refcnt;
};
@@ -113,9 +113,9 @@ struct lib80211_crypt_info {
int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
spinlock_t *lock);
void lib80211_crypt_info_free(struct lib80211_crypt_info *info);
-int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops);
-int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops);
-struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name);
+int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops);
+int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops);
+const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name);
void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
struct lib80211_crypt_data **crypt);
diff --git a/include/net/libeth/cache.h b/include/net/libeth/cache.h
new file mode 100644
index 000000000000..bdb0c043ce61
--- /dev/null
+++ b/include/net/libeth/cache.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_CACHE_H
+#define __LIBETH_CACHE_H
+
+#include <linux/cache.h>
+
+/**
+ * libeth_cacheline_group_assert - make sure cacheline group size is expected
+ * @type: type of the structure containing the group
+ * @grp: group name inside the struct
+ * @sz: expected group size
+ */
+#if defined(CONFIG_64BIT) && SMP_CACHE_BYTES == 64
+#define libeth_cacheline_group_assert(type, grp, sz) \
+ static_assert(offsetof(type, __cacheline_group_end__##grp) - \
+ offsetofend(type, __cacheline_group_begin__##grp) == \
+ (sz))
+#define __libeth_cacheline_struct_assert(type, sz) \
+ static_assert(sizeof(type) == (sz))
+#else /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
+#define libeth_cacheline_group_assert(type, grp, sz) \
+ static_assert(offsetof(type, __cacheline_group_end__##grp) - \
+ offsetofend(type, __cacheline_group_begin__##grp) <= \
+ (sz))
+#define __libeth_cacheline_struct_assert(type, sz) \
+ static_assert(sizeof(type) <= (sz))
+#endif /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
+
+#define __libeth_cls1(sz1) SMP_CACHE_ALIGN(sz1)
+#define __libeth_cls2(sz1, sz2) (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2))
+#define __libeth_cls3(sz1, sz2, sz3) \
+ (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2) + SMP_CACHE_ALIGN(sz3))
+#define __libeth_cls(...) \
+ CONCATENATE(__libeth_cls, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
+
+/**
+ * libeth_cacheline_struct_assert - make sure CL-based struct size is expected
+ * @type: type of the struct
+ * @...: from 1 to 3 CL group sizes (read-mostly, read-write, cold)
+ *
+ * When a struct contains several CL groups, it's difficult to predict its size
+ * on different architectures. The macro instead takes sizes of all of the
+ * groups the structure contains and generates the final struct size.
+ */
+#define libeth_cacheline_struct_assert(type, ...) \
+ __libeth_cacheline_struct_assert(type, __libeth_cls(__VA_ARGS__)); \
+ static_assert(__alignof(type) >= SMP_CACHE_BYTES)
+
+/**
+ * libeth_cacheline_set_assert - make sure CL-based struct layout is expected
+ * @type: type of the struct
+ * @ro: expected size of the read-mostly group
+ * @rw: expected size of the read-write group
+ * @c: expected size of the cold group
+ *
+ * Check that each group size is expected and then do final struct size check.
+ */
+#define libeth_cacheline_set_assert(type, ro, rw, c) \
+ libeth_cacheline_group_assert(type, read_mostly, ro); \
+ libeth_cacheline_group_assert(type, read_write, rw); \
+ libeth_cacheline_group_assert(type, cold, c); \
+ libeth_cacheline_struct_assert(type, ro, rw, c)
+
+#endif /* __LIBETH_CACHE_H */
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
new file mode 100644
index 000000000000..43574bd6612f
--- /dev/null
+++ b/include/net/libeth/rx.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_RX_H
+#define __LIBETH_RX_H
+
+#include <linux/if_vlan.h>
+
+#include <net/page_pool/helpers.h>
+#include <net/xdp.h>
+
+/* Rx buffer management */
+
+/* Space reserved in front of each frame */
+#define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
+/* Maximum headroom for worst-case calculations */
+#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM
+/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
+#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
+/* Maximum supported L2-L4 header length */
+#define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256))
+
+/* Always use order-0 pages */
+#define LIBETH_RX_PAGE_ORDER 0
+/* Pick a sane buffer stride and align to a cacheline boundary */
+#define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128)
+/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
+#define LIBETH_RX_PAGE_LEN(hr) \
+ ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \
+ LIBETH_RX_BUF_STRIDE)
+
+/**
+ * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
+ * @page: page holding the buffer
+ * @offset: offset from the page start (to the headroom)
+ * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
+ *
+ * Depending on the MTU, API switches between one-page-per-frame and shared
+ * page model (to conserve memory on bigger-page platforms). In case of the
+ * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
+ */
+struct libeth_fqe {
+ struct page *page;
+ u32 offset;
+ u32 truesize;
+} __aligned_largest;
+
+/**
+ * enum libeth_fqe_type - enum representing types of Rx buffers
+ * @LIBETH_FQE_MTU: buffer size is determined by MTU
+ * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
+ * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
+ */
+enum libeth_fqe_type {
+ LIBETH_FQE_MTU = 0U,
+ LIBETH_FQE_SHORT,
+ LIBETH_FQE_HDR,
+};
+
+/**
+ * struct libeth_fq - structure representing a buffer (fill) queue
+ * @fp: hotpath part of the structure
+ * @pp: &page_pool for buffer management
+ * @fqes: array of Rx buffers
+ * @truesize: size to allocate per buffer, w/overhead
+ * @count: number of descriptors/buffers the queue has
+ * @type: type of the buffers this queue has
+ * @hsplit: flag whether header split is enabled
+ * @buf_len: HW-writeable length per each buffer
+ * @nid: ID of the closest NUMA node with memory
+ */
+struct libeth_fq {
+ struct_group_tagged(libeth_fq_fp, fp,
+ struct page_pool *pp;
+ struct libeth_fqe *fqes;
+
+ u32 truesize;
+ u32 count;
+ );
+
+ /* Cold fields */
+ enum libeth_fqe_type type:2;
+ bool hsplit:1;
+
+ u32 buf_len;
+ int nid;
+};
+
+int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
+void libeth_rx_fq_destroy(struct libeth_fq *fq);
+
+/**
+ * libeth_rx_alloc - allocate a new Rx buffer
+ * @fq: fill queue to allocate for
+ * @i: index of the buffer within the queue
+ *
+ * Return: DMA address to be passed to HW for Rx on successful allocation,
+ * ```DMA_MAPPING_ERROR``` otherwise.
+ */
+static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
+{
+ struct libeth_fqe *buf = &fq->fqes[i];
+
+ buf->truesize = fq->truesize;
+ buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
+ if (unlikely(!buf->page))
+ return DMA_MAPPING_ERROR;
+
+ return page_pool_get_dma_addr(buf->page) + buf->offset +
+ fq->pp->p.offset;
+}
+
+void libeth_rx_recycle_slow(struct page *page);
+
+/**
+ * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
+ * @fqe: buffer to process
+ * @len: frame length from the descriptor
+ *
+ * Process the buffer after it's written by HW. The regular path is to
+ * synchronize DMA for CPU, but in case of no data it will be immediately
+ * recycled back to its PP.
+ *
+ * Return: true when there's data to process, false otherwise.
+ */
+static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
+ u32 len)
+{
+ struct page *page = fqe->page;
+
+ /* Very rare, but possible case. The most common reason:
+ * the last fragment contained FCS only, which was then
+ * stripped by the HW.
+ */
+ if (unlikely(!len)) {
+ libeth_rx_recycle_slow(page);
+ return false;
+ }
+
+ page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
+
+ return true;
+}
+
+/* Converting abstract packet type numbers into a software structure with
+ * the packet parameters to do O(1) lookup on Rx.
+ */
+
+enum {
+ LIBETH_RX_PT_OUTER_L2 = 0U,
+ LIBETH_RX_PT_OUTER_IPV4,
+ LIBETH_RX_PT_OUTER_IPV6,
+};
+
+enum {
+ LIBETH_RX_PT_NOT_FRAG = 0U,
+ LIBETH_RX_PT_FRAG,
+};
+
+enum {
+ LIBETH_RX_PT_TUNNEL_IP_NONE = 0U,
+ LIBETH_RX_PT_TUNNEL_IP_IP,
+ LIBETH_RX_PT_TUNNEL_IP_GRENAT,
+ LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
+ LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
+};
+
+enum {
+ LIBETH_RX_PT_TUNNEL_END_NONE = 0U,
+ LIBETH_RX_PT_TUNNEL_END_IPV4,
+ LIBETH_RX_PT_TUNNEL_END_IPV6,
+};
+
+enum {
+ LIBETH_RX_PT_INNER_NONE = 0U,
+ LIBETH_RX_PT_INNER_UDP,
+ LIBETH_RX_PT_INNER_TCP,
+ LIBETH_RX_PT_INNER_SCTP,
+ LIBETH_RX_PT_INNER_ICMP,
+ LIBETH_RX_PT_INNER_TIMESYNC,
+};
+
+#define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE
+#define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2
+#define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3
+#define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4
+
+struct libeth_rx_pt {
+ u32 outer_ip:2;
+ u32 outer_frag:1;
+ u32 tunnel_type:3;
+ u32 tunnel_end_prot:2;
+ u32 tunnel_end_frag:1;
+ u32 inner_prot:3;
+ enum pkt_hash_types payload_layer:2;
+
+ u32 pad:2;
+ enum xdp_rss_hash_type hash_type:16;
+};
+
+void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
+
+/**
+ * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
+ * @pt: packet type params
+ *
+ * Wrapper to compile out the IPv6 code from the drivers when not supported
+ * by the kernel.
+ *
+ * Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
+ */
+static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
+{
+#if !IS_ENABLED(CONFIG_IPV6)
+ switch (pt.outer_ip) {
+ case LIBETH_RX_PT_OUTER_IPV4:
+ return LIBETH_RX_PT_OUTER_IPV4;
+ default:
+ return LIBETH_RX_PT_OUTER_L2;
+ }
+#else
+ return pt.outer_ip;
+#endif
+}
+
+/* libeth_has_*() can be used to quickly check whether the HW metadata is
+ * available to avoid further expensive processing such as descriptor reads.
+ * They already check for the corresponding netdev feature to be enabled,
+ * thus can be used as drop-in replacements.
+ */
+
+static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
+ struct libeth_rx_pt pt)
+{
+ /* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
+ * it is enough to check only for the L4 type.
+ */
+ return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
+ (dev->features & NETIF_F_RXCSUM));
+}
+
+static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
+ struct libeth_rx_pt pt)
+{
+ return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
+ (dev->features & NETIF_F_RXHASH));
+}
+
+/**
+ * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
+ * @skb: skb to fill the hash in
+ * @hash: 32-bit hash value from the descriptor
+ * @pt: packet type
+ */
+static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
+ struct libeth_rx_pt pt)
+{
+ skb_set_hash(skb, hash, pt.payload_layer);
+}
+
+#endif /* __LIBETH_RX_H */
diff --git a/include/net/libeth/tx.h b/include/net/libeth/tx.h
new file mode 100644
index 000000000000..35614f9523f6
--- /dev/null
+++ b/include/net/libeth/tx.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_TX_H
+#define __LIBETH_TX_H
+
+#include <linux/skbuff.h>
+
+#include <net/libeth/types.h>
+
+/* Tx buffer completion */
+
+/**
+ * enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion
+ * @LIBETH_SQE_EMPTY: unused/empty, no action required
+ * @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required
+ * @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree()
+ * @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA
+ * @LIBETH_SQE_SKB: &sk_buff, unmap and napi_consume_skb(), update stats
+ */
+enum libeth_sqe_type {
+ LIBETH_SQE_EMPTY = 0U,
+ LIBETH_SQE_CTX,
+ LIBETH_SQE_SLAB,
+ LIBETH_SQE_FRAG,
+ LIBETH_SQE_SKB,
+};
+
+/**
+ * struct libeth_sqe - represents a Send Queue Element / Tx buffer
+ * @type: type of the buffer, see the enum above
+ * @rs_idx: index of the last buffer from the batch this one was sent in
+ * @raw: slab buffer to free via kfree()
+ * @skb: &sk_buff to consume
+ * @dma: DMA address to unmap
+ * @len: length of the mapped region to unmap
+ * @nr_frags: number of frags in the frame this buffer belongs to
+ * @packets: number of physical packets sent for this frame
+ * @bytes: number of physical bytes sent for this frame
+ * @priv: driver-private scratchpad
+ */
+struct libeth_sqe {
+ enum libeth_sqe_type type:32;
+ u32 rs_idx;
+
+ union {
+ void *raw;
+ struct sk_buff *skb;
+ };
+
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+
+ u32 nr_frags;
+ u32 packets;
+ u32 bytes;
+
+ unsigned long priv;
+} __aligned_largest;
+
+/**
+ * LIBETH_SQE_CHECK_PRIV - check the driver's private SQE data
+ * @p: type or name of the object the driver wants to fit into &libeth_sqe
+ *
+ * Make sure the driver's private data fits into libeth_sqe::priv. To be used
+ * right after its declaration.
+ */
+#define LIBETH_SQE_CHECK_PRIV(p) \
+ static_assert(sizeof(p) <= sizeof_field(struct libeth_sqe, priv))
+
+/**
+ * struct libeth_cq_pp - completion queue poll params
+ * @dev: &device to perform DMA unmapping
+ * @ss: onstack NAPI stats to fill
+ * @napi: whether it's called from the NAPI context
+ *
+ * libeth uses this structure to access objects needed for performing full
+ * Tx complete operation without passing lots of arguments and change the
+ * prototypes each time a new one is added.
+ */
+struct libeth_cq_pp {
+ struct device *dev;
+ struct libeth_sq_napi_stats *ss;
+
+ bool napi;
+};
+
+/**
+ * libeth_tx_complete - perform Tx completion for one SQE
+ * @sqe: SQE to complete
+ * @cp: poll params
+ *
+ * Do Tx complete for all the types of buffers, incl. freeing, unmapping,
+ * updating the stats etc.
+ */
+static inline void libeth_tx_complete(struct libeth_sqe *sqe,
+ const struct libeth_cq_pp *cp)
+{
+ switch (sqe->type) {
+ case LIBETH_SQE_EMPTY:
+ return;
+ case LIBETH_SQE_SKB:
+ case LIBETH_SQE_FRAG:
+ case LIBETH_SQE_SLAB:
+ dma_unmap_page(cp->dev, dma_unmap_addr(sqe, dma),
+ dma_unmap_len(sqe, len), DMA_TO_DEVICE);
+ break;
+ default:
+ break;
+ }
+
+ switch (sqe->type) {
+ case LIBETH_SQE_SKB:
+ cp->ss->packets += sqe->packets;
+ cp->ss->bytes += sqe->bytes;
+
+ napi_consume_skb(sqe->skb, cp->napi);
+ break;
+ case LIBETH_SQE_SLAB:
+ kfree(sqe->raw);
+ break;
+ default:
+ break;
+ }
+
+ sqe->type = LIBETH_SQE_EMPTY;
+}
+
+#endif /* __LIBETH_TX_H */
diff --git a/include/net/libeth/types.h b/include/net/libeth/types.h
new file mode 100644
index 000000000000..603825e45133
--- /dev/null
+++ b/include/net/libeth/types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_TYPES_H
+#define __LIBETH_TYPES_H
+
+#include <linux/types.h>
+
+/**
+ * struct libeth_sq_napi_stats - "hot" counters to update in Tx completion loop
+ * @packets: completed frames counter
+ * @bytes: sum of bytes of completed frames above
+ * @raw: alias to access all the fields as an array
+ */
+struct libeth_sq_napi_stats {
+ union {
+ struct {
+ u32 packets;
+ u32 bytes;
+ };
+ DECLARE_FLEX_ARRAY(u32, raw);
+ };
+};
+
+#endif /* __LIBETH_TYPES_H */
diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h
index 53823d61d8b6..a4bea0f33188 100644
--- a/include/net/llc_c_st.h
+++ b/include/net/llc_c_st.h
@@ -44,8 +44,8 @@ struct llc_conn_state_trans {
};
struct llc_conn_state {
- u8 current_state;
- struct llc_conn_state_trans **transitions;
+ u8 current_state;
+ const struct llc_conn_state_trans **transitions;
};
extern struct llc_conn_state llc_conn_state_table[];
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 1d55ba7c45be..86681f29bda7 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -254,7 +254,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
}
/**
- * llc_pdu_decode_sa - extracs source address (MAC) of input frame
+ * llc_pdu_decode_sa - extracts, source address (MAC) of input frame
* @skb: input skb that source address must be extracted from it.
* @sa: pointer to source address (6 byte array).
*
diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h
index ed5b2fa40d32..fca49d483d20 100644
--- a/include/net/llc_s_st.h
+++ b/include/net/llc_s_st.h
@@ -29,8 +29,8 @@ struct llc_sap_state_trans {
};
struct llc_sap_state {
- u8 curr_state;
- struct llc_sap_state_trans **transitions;
+ u8 curr_state;
+ const struct llc_sap_state_trans **transitions;
};
/* only access to SAP state table */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 835a58ce9ca5..8e7094160206 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2025 Intel Corporation
*/
#ifndef MAC80211_H
@@ -22,7 +22,7 @@
#include <net/cfg80211.h>
#include <net/codel.h>
#include <net/ieee80211_radiotap.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/**
* DOC: Introduction
@@ -79,7 +79,7 @@
* helpers for sanity checking. Drivers must ensure all work added onto the
* mac80211 workqueue should be cancelled on the driver stop() callback.
*
- * mac80211 will flushed the workqueue upon interface removal and during
+ * mac80211 will flush the workqueue upon interface removal and during
* suspend.
*
* All work performed on the mac80211 workqueue must not acquire the RTNL lock.
@@ -138,7 +138,7 @@
* field to the frame RX timestamp and report the ack TX timestamp in the
* ieee80211_rx_status struct.
*
- * Similarly, To report hardware timestamps for Timing Measurement or Fine
+ * Similarly, to report hardware timestamps for Timing Measurement or Fine
* Timing Measurement frame TX, the driver should set the SKB's hwtstamp field
* to the frame TX timestamp and report the ack RX timestamp in the
* ieee80211_tx_status struct.
@@ -214,6 +214,10 @@ struct ieee80211_low_level_stats {
* @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel,
* this is used only with channel switching with CSA
* @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed
+ * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider
+ * bandwidth) OFDMA settings need to be changed
+ * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap
+ * was changed.
*/
enum ieee80211_chanctx_change {
IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0),
@@ -221,6 +225,19 @@ enum ieee80211_chanctx_change {
IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2),
IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3),
IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4),
+ IEEE80211_CHANCTX_CHANGE_AP = BIT(5),
+ IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(6),
+};
+
+/**
+ * struct ieee80211_chan_req - A channel "request"
+ * @oper: channel definition to use for operation
+ * @ap: the channel definition of the AP, if any
+ * (otherwise the chan member is %NULL)
+ */
+struct ieee80211_chan_req {
+ struct cfg80211_chan_def oper;
+ struct cfg80211_chan_def ap;
};
/**
@@ -231,6 +248,9 @@ enum ieee80211_chanctx_change {
*
* @def: the channel definition
* @min_def: the minimum channel definition currently required.
+ * @ap: the channel definition the AP actually is operating as,
+ * for use with (wider bandwidth) OFDMA
+ * @radio_idx: index of the wiphy radio used used for this channel
* @rx_chains_static: The number of RX chains that must always be
* active on the channel to receive MIMO transmissions
* @rx_chains_dynamic: The number of RX chains that must be enabled
@@ -243,7 +263,9 @@ enum ieee80211_chanctx_change {
struct ieee80211_chanctx_conf {
struct cfg80211_chan_def def;
struct cfg80211_chan_def min_def;
+ struct cfg80211_chan_def ap;
+ int radio_idx;
u8 rx_chains_static, rx_chains_dynamic;
bool radar_enabled;
@@ -340,7 +362,9 @@ struct ieee80211_vif_chanctx_switch {
* @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed.
* @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response
* status changed.
- * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed.
+ * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed.
+ * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed
+ * @BSS_CHANGED_TPE: transmit power envelope changed
*/
enum ieee80211_bss_change {
BSS_CHANGED_ASSOC = 1<<0,
@@ -374,8 +398,10 @@ enum ieee80211_bss_change {
BSS_CHANGED_HE_OBSS_PD = 1<<28,
BSS_CHANGED_HE_BSS_COLOR = 1<<29,
BSS_CHANGED_FILS_DISCOVERY = 1<<30,
- BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
- BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32),
+ BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = BIT_ULL(31),
+ BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33),
+ BSS_CHANGED_MLD_TTLM = BIT_ULL(34),
+ BSS_CHANGED_TPE = BIT_ULL(35),
/* when adding here, make sure to change ieee80211_reconfig */
};
@@ -474,9 +500,9 @@ struct ieee80211_ba_event {
/**
* struct ieee80211_event - event to be sent to the driver
* @type: The event itself. See &enum ieee80211_event_type.
- * @rssi: relevant if &type is %RSSI_EVENT
- * @mlme: relevant if &type is %AUTH_EVENT
- * @ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT
+ * @u.rssi: relevant if &type is %RSSI_EVENT
+ * @u.mlme: relevant if &type is %AUTH_EVENT
+ * @u.ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT
* @u:union holding the fields above
*/
struct ieee80211_event {
@@ -528,6 +554,39 @@ struct ieee80211_fils_discovery {
u32 max_interval;
};
+#define IEEE80211_TPE_EIRP_ENTRIES_320MHZ 5
+struct ieee80211_parsed_tpe_eirp {
+ bool valid;
+ s8 power[IEEE80211_TPE_EIRP_ENTRIES_320MHZ];
+ u8 count;
+};
+
+#define IEEE80211_TPE_PSD_ENTRIES_320MHZ 16
+struct ieee80211_parsed_tpe_psd {
+ bool valid;
+ s8 power[IEEE80211_TPE_PSD_ENTRIES_320MHZ];
+ u8 count, n;
+};
+
+/**
+ * struct ieee80211_parsed_tpe - parsed transmit power envelope information
+ * @max_local: maximum local EIRP, one value for 20, 40, 80, 160, 320 MHz each
+ * (indexed by TX power category)
+ * @max_reg_client: maximum regulatory client EIRP, one value for 20, 40, 80,
+ * 160, 320 MHz each
+ * (indexed by TX power category)
+ * @psd_local: maximum local power spectral density, one value for each 20 MHz
+ * subchannel per bss_conf's chanreq.oper
+ * (indexed by TX power category)
+ * @psd_reg_client: maximum regulatory power spectral density, one value for
+ * each 20 MHz subchannel per bss_conf's chanreq.oper
+ * (indexed by TX power category)
+ */
+struct ieee80211_parsed_tpe {
+ struct ieee80211_parsed_tpe_eirp max_local[2], max_reg_client[2];
+ struct ieee80211_parsed_tpe_psd psd_local[2], psd_reg_client[2];
+};
+
/**
* struct ieee80211_bss_conf - holds the BSS's changing parameters
*
@@ -535,12 +594,14 @@ struct ieee80211_fils_discovery {
* to that BSS) that can change during the lifetime of the BSS.
*
* @vif: reference to owning VIF
+ * @bss: the cfg80211 bss descriptor. Valid only for a station, and only
+ * when associated. Note: This contains information which is not
+ * necessarily authenticated. For example, information coming from probe
+ * responses.
* @addr: (link) address used locally
* @link_id: link ID, or 0 for non-MLO
* @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
* @uora_exists: is the UORA element advertised by AP
- * @ack_enabled: indicates support to receive a multi-TID that solicits either
- * ACK, BACK or both
* @uora_ocw_range: UORA element's OCW Range field
* @frame_time_rts_th: HE duration RTS threshold, in units of 32us
* @he_support: does this BSS support HE
@@ -581,7 +642,7 @@ struct ieee80211_fils_discovery {
* @mcast_rate: per-band multicast rate index + 1 (0: disabled)
* @bssid: The BSSID for this BSS
* @enable_beacon: whether beaconing should be enabled or not
- * @chandef: Channel definition for this BSS -- the hardware might be
+ * @chanreq: Channel request for this BSS -- the hardware might be
* configured a higher bandwidth than this BSS uses, for example.
* @mu_group: VHT MU-MIMO group membership data
* @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation.
@@ -638,24 +699,17 @@ struct ieee80211_fils_discovery {
* @beacon_tx_rate: The configured beacon transmit rate that needs to be passed
* to driver when rate control is offloaded to firmware.
* @power_type: power type of BSS for 6 GHz
- * @tx_pwr_env: transmit power envelope array of BSS.
- * @tx_pwr_env_num: number of @tx_pwr_env.
+ * @tpe: transmit power envelope information
* @pwr_reduction: power constraint of BSS.
* @eht_support: does this BSS support EHT
- * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS
- * @csa_active: marks whether a channel switch is going on. Internally it is
- * write-protected by sdata_lock and local->mtx so holding either is fine
- * for read access.
- * @csa_punct_bitmap: new puncturing bitmap for channel switch
+ * @csa_active: marks whether a channel switch is going on.
* @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @chanctx_conf: The channel context this interface is assigned to, or %NULL
* when it is not assigned. This pointer is RCU-protected due to the TX
* path needing to access it; even though the netdev carrier will always
* be off when it is %NULL there can still be races and packets could be
* processed after it switches back to %NULL.
- * @color_change_active: marks whether a color change is ongoing. Internally it is
- * write-protected by sdata_lock and local->mtx so holding either is fine
- * for read access.
+ * @color_change_active: marks whether a color change is ongoing.
* @color_change_color: the bss color that will be used after the change.
* @ht_ldpc: in AP mode, indicates interface has HT LDPC capability.
* @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability.
@@ -683,9 +737,13 @@ struct ieee80211_fils_discovery {
* beamformee
* @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU
* beamformer
+ * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the
+ * reception of an EHT TB PPDU on an RU that spans the entire PPDU
+ * bandwidth
*/
struct ieee80211_bss_conf {
struct ieee80211_vif *vif;
+ struct cfg80211_bss *bss;
const u8 *bssid;
unsigned int link_id;
@@ -718,7 +776,7 @@ struct ieee80211_bss_conf {
u32 cqm_rssi_hyst;
s32 cqm_rssi_low;
s32 cqm_rssi_high;
- struct cfg80211_chan_def chandef;
+ struct ieee80211_chan_req chanreq;
struct ieee80211_mu_group_data mu_group;
bool qos;
bool hidden_ssid;
@@ -747,14 +805,13 @@ struct ieee80211_bss_conf {
u32 unsol_bcast_probe_resp_interval;
struct cfg80211_bitrate_mask beacon_tx_rate;
enum ieee80211_ap_reg_power power_type;
- struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
- u8 tx_pwr_env_num;
+
+ struct ieee80211_parsed_tpe tpe;
+
u8 pwr_reduction;
bool eht_support;
- u16 eht_puncturing;
bool csa_active;
- u16 csa_punct_bitmap;
bool mu_mimo_owner;
struct ieee80211_chanctx_conf __rcu *chanctx_conf;
@@ -776,6 +833,7 @@ struct ieee80211_bss_conf {
bool eht_su_beamformer;
bool eht_su_beamformee;
bool eht_mu_beamformer;
+ bool eht_80mhz_full_bw_ul_mumimo;
};
/**
@@ -1086,6 +1144,11 @@ struct ieee80211_tx_rate {
#define IEEE80211_MAX_TX_RETRY 31
+static inline bool ieee80211_rate_valid(struct ieee80211_tx_rate *rate)
+{
+ return rate->idx >= 0 && rate->count > 0;
+}
+
static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate,
u8 mcs, u8 nss)
{
@@ -1119,7 +1182,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
* not valid if the interface is an MLD since we won't know which
* link the frame will be transmitted on
* @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
- * @ack_frame_id: internal frame ID for TX status, used internally
+ * @status_data: internal data for TX status handling, assigned privately,
+ * see also &enum ieee80211_status_data for the internal documentation
+ * @status_data_idr: indicates status data is IDR allocated ID for ack frame
* @tx_time_est: TX time estimate in units of 4us, used internally
* @control: union part for control data
* @control.rates: TX rates array to try
@@ -1149,20 +1214,16 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
* @ack: union part for pure ACK data
* @ack.cookie: cookie for the ACK
* @driver_data: array of driver_data pointers
- * @ampdu_ack_len: number of acked aggregated frames.
- * relevant only if IEEE80211_TX_STAT_AMPDU was set.
- * @ampdu_len: number of aggregated frames.
- * relevant only if IEEE80211_TX_STAT_AMPDU was set.
- * @ack_signal: signal strength of the ACK frame
*/
struct ieee80211_tx_info {
/* common information */
u32 flags;
u32 band:3,
- ack_frame_id:13,
+ status_data_idr:1,
+ status_data:13,
hw_queue:4,
tx_time_est:10;
- /* 2 free bits */
+ /* 1 free bit */
union {
struct {
@@ -1176,7 +1237,11 @@ struct ieee80211_tx_info {
u8 use_cts_prot:1;
u8 short_preamble:1;
u8 skip_table:1;
- /* 2 bytes free */
+
+ /* for injection only (bitmap) */
+ u8 antennas:2;
+
+ /* 14 bits free */
};
/* only needed before rate control */
unsigned long jiffies;
@@ -1356,6 +1421,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* the frame.
* @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on
* the frame.
+ * @RX_FLAG_MACTIME: The timestamp passed in the RX status (@mactime
+ * field) is valid if this field is non-zero, and the position
+ * where the timestamp was sampled depends on the value.
* @RX_FLAG_MACTIME_START: The timestamp passed in the RX status (@mactime
* field) is valid and contains the time the first symbol of the MPDU
* was received. This is useful in monitor mode and for proper IBSS
@@ -1365,6 +1433,11 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* (including FCS) was received.
* @RX_FLAG_MACTIME_PLCP_START: The timestamp passed in the RX status (@mactime
* field) is valid and contains the time the SYNC preamble was received.
+ * @RX_FLAG_MACTIME_IS_RTAP_TS64: The timestamp passed in the RX status @mactime
+ * is only for use in the radiotap timestamp header, not otherwise a valid
+ * @mactime value. Note this is a separate flag so that we continue to see
+ * %RX_FLAG_MACTIME as unset. Also note that in this case the timestamp is
+ * reported to be 64 bits wide, not just 32.
* @RX_FLAG_NO_SIGNAL_VAL: The signal strength value is not present.
* Valid only for data frames (mainly A-MPDU)
* @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference
@@ -1435,12 +1508,12 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
enum mac80211_rx_flags {
RX_FLAG_MMIC_ERROR = BIT(0),
RX_FLAG_DECRYPTED = BIT(1),
- RX_FLAG_MACTIME_PLCP_START = BIT(2),
+ RX_FLAG_ONLY_MONITOR = BIT(2),
RX_FLAG_MMIC_STRIPPED = BIT(3),
RX_FLAG_IV_STRIPPED = BIT(4),
RX_FLAG_FAILED_FCS_CRC = BIT(5),
RX_FLAG_FAILED_PLCP_CRC = BIT(6),
- RX_FLAG_MACTIME_START = BIT(7),
+ RX_FLAG_MACTIME_IS_RTAP_TS64 = BIT(7),
RX_FLAG_NO_SIGNAL_VAL = BIT(8),
RX_FLAG_AMPDU_DETAILS = BIT(9),
RX_FLAG_PN_VALIDATED = BIT(10),
@@ -1449,8 +1522,10 @@ enum mac80211_rx_flags {
RX_FLAG_AMPDU_IS_LAST = BIT(13),
RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(14),
RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(15),
- RX_FLAG_MACTIME_END = BIT(16),
- RX_FLAG_ONLY_MONITOR = BIT(17),
+ RX_FLAG_MACTIME = BIT(16) | BIT(17),
+ RX_FLAG_MACTIME_PLCP_START = 1 << 16,
+ RX_FLAG_MACTIME_START = 2 << 16,
+ RX_FLAG_MACTIME_END = 3 << 16,
RX_FLAG_SKIP_MONITOR = BIT(18),
RX_FLAG_AMSDU_MORE = BIT(19),
RX_FLAG_RADIOTAP_TLV_AT_END = BIT(20),
@@ -1733,8 +1808,9 @@ struct ieee80211_conf {
* @chandef: the new channel to switch to
* @count: the number of TBTT's until the channel switch event
* @delay: maximum delay between the time the AP transmitted the last beacon in
- * current channel and the expected time of the first beacon in the new
- * channel, expressed in TU.
+ * current channel and the expected time of the first beacon in the new
+ * channel, expressed in TU.
+ * @link_id: the link ID of the link doing the channel switch, 0 for non-MLO
*/
struct ieee80211_channel_switch {
u64 timestamp;
@@ -1742,6 +1818,7 @@ struct ieee80211_channel_switch {
bool block_tx;
struct cfg80211_chan_def chandef;
u8 count;
+ u8 link_id;
u32 delay;
};
@@ -1761,15 +1838,20 @@ struct ieee80211_channel_switch {
* @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes
* and send P2P_PS notification to the driver if NOA changed, even
* this is not pure P2P vif.
- * @IEEE80211_VIF_DISABLE_SMPS_OVERRIDE: disable user configuration of
- * SMPS mode via debugfs.
+ * @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is
+ * enabled for the interface.
+ * @IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW: Ignore wider bandwidth OFDMA
+ * operation on this interface and request a channel context without
+ * the AP definition. Use this e.g. because the device is able to
+ * handle OFDMA (downlink and trigger for uplink) on a per-AP basis.
*/
enum ieee80211_vif_flags {
IEEE80211_VIF_BEACON_FILTER = BIT(0),
IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1),
IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2),
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
- IEEE80211_VIF_DISABLE_SMPS_OVERRIDE = BIT(4),
+ IEEE80211_VIF_EML_ACTIVE = BIT(4),
+ IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW = BIT(5),
};
@@ -1799,9 +1881,11 @@ enum ieee80211_offload_flags {
* @ps: power-save mode (STA only). This flag is NOT affected by
* offchannel/dynamic_ps operations.
* @aid: association ID number, valid only when @assoc is true
- * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k.
+ * @eml_cap: EML capabilities as described in P802.11be_D4.1 Figure 9-1001j.
* @eml_med_sync_delay: Medium Synchronization delay as described in
- * P802.11be_D2.2 Figure 9-1002j.
+ * P802.11be_D4.1 Figure 9-1001i.
+ * @mld_capa_op: MLD Capabilities and Operations per P802.11be_D4.1
+ * Figure 9-1001k
* @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
* may filter ARP queries targeted for other addresses than listed here.
* The driver must allow ARP queries targeted for all address listed here
@@ -1826,6 +1910,7 @@ struct ieee80211_vif_cfg {
u16 aid;
u16 eml_cap;
u16 eml_med_sync_delay;
+ u16 mld_capa_op;
__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
int arp_addr_cnt;
@@ -1836,6 +1921,35 @@ struct ieee80211_vif_cfg {
u8 ap_addr[ETH_ALEN] __aligned(2);
};
+#define IEEE80211_TTLM_NUM_TIDS 8
+
+/**
+ * struct ieee80211_neg_ttlm - negotiated TID to link map info
+ *
+ * @downlink: bitmap of active links per TID for downlink, or 0 if mapping for
+ * this TID is not included.
+ * @uplink: bitmap of active links per TID for uplink, or 0 if mapping for this
+ * TID is not included.
+ * @valid: info is valid or not.
+ */
+struct ieee80211_neg_ttlm {
+ u16 downlink[IEEE80211_TTLM_NUM_TIDS];
+ u16 uplink[IEEE80211_TTLM_NUM_TIDS];
+ bool valid;
+};
+
+/**
+ * enum ieee80211_neg_ttlm_res - return value for negotiated TTLM handling
+ * @NEG_TTLM_RES_ACCEPT: accept the request
+ * @NEG_TTLM_RES_REJECT: reject the request
+ * @NEG_TTLM_RES_SUGGEST_PREFERRED: reject and suggest a new mapping
+ */
+enum ieee80211_neg_ttlm_res {
+ NEG_TTLM_RES_ACCEPT,
+ NEG_TTLM_RES_REJECT,
+ NEG_TTLM_RES_SUGGEST_PREFERRED
+};
+
/**
* struct ieee80211_vif - per-interface data
*
@@ -1852,8 +1966,15 @@ struct ieee80211_vif_cfg {
* @active_links: The bitmap of active links, or 0 for non-MLO.
* The driver shouldn't change this directly, but use the
* API calls meant for that purpose.
- * @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO.
- * Must be a subset of valid_links.
+ * @dormant_links: subset of the valid links that are disabled/suspended
+ * due to advertised or negotiated TTLM respectively.
+ * 0 for non-MLO.
+ * @suspended_links: subset of dormant_links representing links that are
+ * suspended due to negotiated TTLM, and could be activated in the
+ * future by tearing down the TTLM negotiation.
+ * 0 for non-MLO.
+ * @neg_ttlm: negotiated TID to link mapping info.
+ * see &struct ieee80211_neg_ttlm.
* @addr: address of this interface
* @p2p: indicates whether this AP or STA interface is a p2p
* interface, i.e. a GO or p2p-sta respectively
@@ -1891,7 +2012,8 @@ struct ieee80211_vif {
struct ieee80211_vif_cfg cfg;
struct ieee80211_bss_conf bss_conf;
struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS];
- u16 valid_links, active_links, dormant_links;
+ u16 valid_links, active_links, dormant_links, suspended_links;
+ struct ieee80211_neg_ttlm neg_ttlm;
u8 addr[ETH_ALEN] __aligned(2);
bool p2p;
@@ -1938,11 +2060,26 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif)
return vif->valid_links != 0;
}
+/**
+ * ieee80211_vif_link_active - check if a given link is active
+ * @vif: the vif
+ * @link_id: the link ID to check
+ * Return: %true if the vif is an MLD and the link is active, or if
+ * the vif is not an MLD and the link ID is 0; %false otherwise.
+ */
+static inline bool ieee80211_vif_link_active(const struct ieee80211_vif *vif,
+ unsigned int link_id)
+{
+ if (!ieee80211_vif_is_mld(vif))
+ return link_id == 0;
+ return vif->active_links & BIT(link_id);
+}
+
#define for_each_vif_active_link(vif, link, link_id) \
for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \
if ((!(vif)->active_links || \
(vif)->active_links & BIT(link_id)) && \
- (link = rcu_dereference((vif)->link_conf[link_id])))
+ (link = link_conf_dereference_check(vif, link_id)))
static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
{
@@ -1959,7 +2096,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
* This can be used by mac80211 drivers with direct cfg80211 APIs
* (like the vendor commands) that get a wdev.
*
- * Note that this function may return %NULL if the given wdev isn't
+ * Return: pointer to the wdev, or %NULL if the given wdev isn't
* associated with a vif that the driver knows about (e.g. monitor
* or AP_VLAN interfaces.)
*/
@@ -1972,25 +2109,23 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
* This can be used by mac80211 drivers with direct cfg80211 APIs
* (like the vendor commands) that needs to get the wdev for a vif.
* This can also be useful to get the netdev associated to a vif.
+ *
+ * Return: pointer to the wdev
*/
struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
-/**
- * lockdep_vif_mutex_held - for lockdep checks on link poiners
- * @vif: the interface to check
- */
-static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif)
+static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif)
{
- return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx);
+ return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->wiphy->mtx);
}
#define link_conf_dereference_protected(vif, link_id) \
rcu_dereference_protected((vif)->link_conf[link_id], \
- lockdep_vif_mutex_held(vif))
+ lockdep_vif_wiphy_mutex_held(vif))
#define link_conf_dereference_check(vif, link_id) \
rcu_dereference_check((vif)->link_conf[link_id], \
- lockdep_vif_mutex_held(vif))
+ lockdep_vif_wiphy_mutex_held(vif))
/**
* enum ieee80211_key_flags - key flags
@@ -2034,8 +2169,10 @@ static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif)
* @IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key.
* @IEEE80211_KEY_FLAG_NO_AUTO_TX: Key needs explicit Tx activation.
* @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver
- * for a AES_CMAC key to indicate that it requires sequence number
- * generation only
+ * for a AES_CMAC or a AES_GMAC key to indicate that it requires sequence
+ * number generation only
+ * @IEEE80211_KEY_FLAG_SPP_AMSDU: SPP A-MSDUs can be used with this key
+ * (set by mac80211 from the sta->spp_amsdu flag)
*/
enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0),
@@ -2049,6 +2186,7 @@ enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8),
IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9),
IEEE80211_KEY_FLAG_GENERATE_MMIE = BIT(10),
+ IEEE80211_KEY_FLAG_SPP_AMSDU = BIT(11),
};
/**
@@ -2347,9 +2485,10 @@ struct ieee80211_link_sta {
* would be assigned to link[link_id] where link_id is the id assigned
* by the AP.
* @valid_links: bitmap of valid links, or 0 for non-MLO
+ * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not.
*/
struct ieee80211_sta {
- u8 addr[ETH_ALEN];
+ u8 addr[ETH_ALEN] __aligned(2);
u16 aid;
u16 max_rx_aggregation_subframes;
bool wme;
@@ -2360,6 +2499,7 @@ struct ieee80211_sta {
bool tdls_initiator;
bool mfp;
bool mlo;
+ bool spp_amsdu;
u8 max_amsdu_subframes;
struct ieee80211_sta_aggregates *cur;
@@ -2397,7 +2537,7 @@ static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta)
for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \
if ((!(vif)->active_links || \
(vif)->active_links & BIT(link_id)) && \
- ((link_sta) = link_sta_dereference_protected(sta, link_id)))
+ ((link_sta) = link_sta_dereference_check(sta, link_id)))
/**
* enum sta_notify_cmd - sta notify command
@@ -2630,14 +2770,6 @@ struct ieee80211_txq {
* @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
* TDLS links.
*
- * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
- * mgd_prepare_tx() callback to be called before transmission of a
- * deauthentication frame in case the association was completed but no
- * beacon was heard. This is required in multi-channel scenarios, where the
- * virtual interface might not be given air time for the transmission of
- * the frame, as it is not synced with the AP/P2P GO yet, and thus the
- * deauthentication frame might not be transmitted.
- *
* @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
* support QoS NDP for AP probing - that's most likely a driver bug.
*
@@ -2684,6 +2816,21 @@ struct ieee80211_txq {
* @IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX: Hardware/driver handles transmitting
* multicast frames on all links, mac80211 should not do that.
*
+ * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT
+ * and connecting with a lower bandwidth instead
+ * @IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ: HW requires disabling puncturing in
+ * EHT in 5 GHz and connecting with a lower bandwidth instead
+ *
+ * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so
+ * no need to stop queues. This really should be set by a driver that
+ * implements MLO, so operation can continue on other links when one
+ * link is switching.
+ *
+ * @IEEE80211_HW_STRICT: strictly enforce certain things mandated by the spec
+ * but otherwise ignored/worked around for interoperability. This is a
+ * HW flag so drivers can opt in according to their own control, e.g. in
+ * testing.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2727,7 +2874,6 @@ enum ieee80211_hw_flags {
IEEE80211_HW_REPORTS_LOW_ACK,
IEEE80211_HW_SUPPORTS_TX_FRAG,
IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
- IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
IEEE80211_HW_BUFF_MMPDU_TXQ,
IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW,
@@ -2741,6 +2887,10 @@ enum ieee80211_hw_flags {
IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP,
IEEE80211_HW_DETECTS_COLOR_COLLISION,
IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX,
+ IEEE80211_HW_DISALLOW_PUNCTURING,
+ IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ,
+ IEEE80211_HW_HANDLES_QUIET_CSA,
+ IEEE80211_HW_STRICT,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -2829,8 +2979,6 @@ enum ieee80211_hw_flags {
* the default is _GI | _BANDWIDTH.
* Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values.
*
- * @radiotap_he: HE radiotap validity flags
- *
* @radiotap_timestamp: Information for the radiotap timestamp field; if the
* @units_pos member is set to a non-negative value then the timestamp
* field will be added and populated from the &struct ieee80211_rx_status
@@ -3073,7 +3221,7 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* The set_key() call for the %SET_KEY command should return 0 if
* the key is now in use, -%EOPNOTSUPP or -%ENOSPC if it couldn't be
* added; if you return 0 then hw_key_idx must be assigned to the
- * hardware key index, you are free to use the full u8 range.
+ * hardware key index. You are free to use the full u8 range.
*
* Note that in the case that the @IEEE80211_HW_SW_CRYPTO_CONTROL flag is
* set, mac80211 will not automatically fall back to software crypto if
@@ -3083,7 +3231,7 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* When the cmd is %DISABLE_KEY then it must succeed.
*
* Note that it is permissible to not decrypt a frame even if a key
- * for it has been uploaded to hardware, the stack will not make any
+ * for it has been uploaded to hardware. The stack will not make any
* decision based on whether a key has been uploaded or not but rather
* based on the receive flags.
*
@@ -3098,7 +3246,7 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* The update_tkip_key() call updates the driver with the new phase 1 key.
* This happens every time the iv16 wraps around (every 65536 packets). The
* set_key() call will happen only once for each key (unless the AP did
- * rekeying), it will not include a valid phase 1 key. The valid phase 1 key is
+ * rekeying); it will not include a valid phase 1 key. The valid phase 1 key is
* provided by update_tkip_key only. The trigger that makes mac80211 call this
* handler is software decryption with wrap around of iv16.
*
@@ -3125,7 +3273,7 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
*
* mac80211 has support for various powersave implementations.
*
- * First, it can support hardware that handles all powersaving by itself,
+ * First, it can support hardware that handles all powersaving by itself;
* such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS hardware
* flag. In that case, it will be told about the desired powersave mode
* with the %IEEE80211_CONF_PS flag depending on the association status.
@@ -3150,12 +3298,12 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still
* required to pass up beacons. The hardware is still required to handle
* waking up for multicast traffic; if it cannot the driver must handle that
- * as best as it can, mac80211 is too slow to do that.
+ * as best as it can; mac80211 is too slow to do that.
*
* Dynamic powersave is an extension to normal powersave in which the
* hardware stays awake for a user-specified period of time after sending a
* frame so that reply frames need not be buffered and therefore delayed to
- * the next wakeup. It's compromise of getting good enough latency when
+ * the next wakeup. It's a compromise of getting good enough latency when
* there's data traffic and still saving significantly power in idle
* periods.
*
@@ -3220,7 +3368,7 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* Note that change, for the sake of simplification, also includes information
* elements appearing or disappearing from the beacon.
*
- * Some hardware supports an "ignore list" instead, just make sure nothing
+ * Some hardware supports an "ignore list" instead. Just make sure nothing
* that was requested is on the ignore list, and include commonly changing
* information element IDs in the ignore list, for example 11 (BSS load) and
* the various vendor-assigned IEs with unknown contents (128, 129, 133-136,
@@ -3231,7 +3379,7 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* In addition to these capabilities, hardware should support notifying the
* host of changes in the beacon RSSI. This is relevant to implement roaming
* when no traffic is flowing (when traffic is flowing we see the RSSI of
- * the received data packets). This can consist in notifying the host when
+ * the received data packets). This can consist of notifying the host when
* the RSSI changes significantly or when it drops below or rises above
* configurable thresholds. In the future these thresholds will also be
* configured by mac80211 (which gets them from userspace) to implement
@@ -3378,8 +3526,8 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* period starts for any reason, @release_buffered_frames is called
* with the number of frames to be released and which TIDs they are
* to come from. In this case, the driver is responsible for setting
- * the EOSP (for uAPSD) and MORE_DATA bits in the released frames,
- * to help the @more_data parameter is passed to tell the driver if
+ * the EOSP (for uAPSD) and MORE_DATA bits in the released frames.
+ * To help the @more_data parameter is passed to tell the driver if
* there is more data on other TIDs -- the TIDs to release frames
* from are ignored since mac80211 doesn't know how many frames the
* buffers for those TIDs contain.
@@ -3428,7 +3576,7 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
* Additionally, the driver has to then use these HW queue IDs for the queue
* management functions (ieee80211_stop_queue() et al.)
*
- * The driver is free to set up the queue mappings as needed, multiple virtual
+ * The driver is free to set up the queue mappings as needed; multiple virtual
* interfaces may map to the same hardware queues if needed. The setup has to
* happen during add_interface or change_interface callbacks. For example, a
* driver supporting station+station and station+AP modes might decide to have
@@ -3652,11 +3800,16 @@ enum ieee80211_reconfig_type {
* @success: whether the frame exchange was successful, only
* used with the mgd_complete_tx() method, and then only
* valid for auth and (re)assoc.
+ * @was_assoc: set if this call is due to deauth/disassoc
+ * while just having been associated
+ * @link_id: the link id on which the frame will be TX'ed.
+ * 0 for a non-MLO connection.
*/
struct ieee80211_prep_tx_info {
u16 duration;
u16 subtype;
- u8 success:1;
+ u8 success:1, was_assoc:1;
+ int link_id;
};
/**
@@ -3880,6 +4033,10 @@ struct ieee80211_prep_tx_info {
* the station. See @sta_pre_rcu_remove if needed.
* This callback can sleep.
*
+ * @vif_add_debugfs: Drivers can use this callback to add a debugfs vif
+ * directory with its files. This callback should be within a
+ * CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep.
+ *
* @link_add_debugfs: Drivers can use this callback to add debugfs files
* when a link is added to a mac80211 vif. This callback should be within
* a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep.
@@ -4084,11 +4241,15 @@ struct ieee80211_prep_tx_info {
* This callback must be atomic.
*
* @get_et_sset_count: Ethtool API to get string-set count.
+ * Note that the wiphy mutex is not held for this callback since it's
+ * expected to return a static value.
*
* @get_et_stats: Ethtool API to get a set of u64 stats.
*
* @get_et_strings: Ethtool API to get a set of strings to describe stats
* and perhaps other supported types of ethtool data-sets.
+ * Note that the wiphy mutex is not held for this callback since it's
+ * expected to return a static value.
*
* @mgd_prepare_tx: Prepare for transmitting a management frame for association
* before associated. In multi-channel scenarios, a virtual interface is
@@ -4096,12 +4257,9 @@ struct ieee80211_prep_tx_info {
* yet it need not necessarily be given airtime, in particular since any
* transmission to a P2P GO needs to be synchronized against the GO's
* powersave state. mac80211 will call this function before transmitting a
- * management frame prior to having successfully associated to allow the
- * driver to give it channel time for the transmission, to get a response
- * and to be able to synchronize with the GO.
- * For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
- * would also call this function before transmitting a deauthentication
- * frame in case that no beacon was heard from the AP/P2P GO.
+ * management frame prior to transmitting that frame to allow the driver
+ * to give it channel time for the transmission, to get a response and be
+ * able to synchronize with the GO.
* The callback will be called before each transmission and upon return
* mac80211 will transmit the frame right away.
* Additional information is passed in the &struct ieee80211_prep_tx_info
@@ -4177,7 +4335,7 @@ struct ieee80211_prep_tx_info {
* after a channel switch procedure is completed, allowing the
* driver to go back to a normal configuration.
* @abort_channel_switch: This is an optional callback that is called
- * when channel switch procedure was completed, allowing the
+ * when channel switch procedure was aborted, allowing the
* driver to go back to a normal configuration.
* @channel_switch_rx_beacon: This is an optional callback that is called
* when channel switch procedure is in progress and additional beacon with
@@ -4267,6 +4425,8 @@ struct ieee80211_prep_tx_info {
* disable background CAC/radar detection.
* @net_fill_forward_path: Called from .ndo_fill_forward_path in order to
* resolve a path for hardware flow offloading
+ * @can_activate_links: Checks if a specific active_links bitmap is
+ * supported by the driver.
* @change_vif_links: Change the valid links on an interface, note that while
* removing the old link information is still valid (link_conf pointer),
* but may immediately disappear after the function returns. The old or
@@ -4286,13 +4446,17 @@ struct ieee80211_prep_tx_info {
* flow offloading for flows originating from the vif.
* Note that the driver must not assume that the vif driver_data is valid
* at this point, since the callback can be called during netdev teardown.
+ * @can_neg_ttlm: for managed interface, requests the driver to determine
+ * if the requested TID-To-Link mapping can be accepted or not.
+ * If it's not accepted the driver may suggest a preferred mapping and
+ * modify @ttlm parameter with the suggested TID-to-Link mapping.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
struct ieee80211_tx_control *control,
struct sk_buff *skb);
int (*start)(struct ieee80211_hw *hw);
- void (*stop)(struct ieee80211_hw *hw);
+ void (*stop)(struct ieee80211_hw *hw, bool suspend);
#ifdef CONFIG_PM
int (*suspend)(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan);
int (*resume)(struct ieee80211_hw *hw);
@@ -4375,6 +4539,8 @@ struct ieee80211_ops {
int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_sta *sta);
#ifdef CONFIG_MAC80211_DEBUGFS
+ void (*vif_add_debugfs)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif);
void (*link_add_debugfs)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf,
@@ -4523,7 +4689,8 @@ struct ieee80211_ops {
struct ieee80211_prep_tx_info *info);
void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ unsigned int link_id);
int (*add_chanctx)(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx);
@@ -4561,9 +4728,11 @@ struct ieee80211_ops {
struct ieee80211_channel_switch *ch_switch);
int (*post_channel_switch)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
void (*abort_channel_switch)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_channel_switch *ch_switch);
@@ -4643,6 +4812,9 @@ struct ieee80211_ops {
struct ieee80211_sta *sta,
struct net_device_path_ctx *ctx,
struct net_device_path *path);
+ bool (*can_activate_links)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u16 active_links);
int (*change_vif_links)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
u16 old_links, u16 new_links,
@@ -4659,6 +4831,9 @@ struct ieee80211_ops {
struct net_device *dev,
enum tc_setup_type type,
void *type_data);
+ enum ieee80211_neg_ttlm_res
+ (*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_neg_ttlm *ttlm);
};
/**
@@ -4907,7 +5082,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw);
* for a single hardware must be synchronized against each other. Calls to
* this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
* mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* This function must be called with BHs disabled and RCU read lock
*
@@ -4932,7 +5107,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
* for a single hardware must be synchronized against each other. Calls to
* this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
* mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* This function must be called with BHs disabled.
*
@@ -4957,7 +5132,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
* for a single hardware must be synchronized against each other. Calls to
* this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
* mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* In process context use instead ieee80211_rx_ni().
*
@@ -4977,7 +5152,7 @@ static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
*
* Calls to this function, ieee80211_rx() or ieee80211_rx_ni() may not
* be mixed for a single hardware.Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* @hw: the hardware this frame came in on
* @skb: the buffer to receive, owned by mac80211 after this call
@@ -4992,7 +5167,7 @@ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb);
*
* Calls to this function, ieee80211_rx() and ieee80211_rx_irqsafe() may
* not be mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* @hw: the hardware this frame came in on
* @skb: the buffer to receive, owned by mac80211 after this call
@@ -5137,22 +5312,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
int max_rates);
/**
- * ieee80211_sta_set_expected_throughput - set the expected tpt for a station
- *
- * Call this function to notify mac80211 about a change in expected throughput
- * to a station. A driver for a device that does rate control in firmware can
- * call this function when the expected throughput estimate towards a station
- * changes. The information is used to tune the CoDel AQM applied to traffic
- * going towards that station (which can otherwise be too aggressive and cause
- * slow stations to starve).
- *
- * @pubsta: the station to set throughput for.
- * @thr: the current expected throughput in kbps.
- */
-void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
- u32 thr);
-
-/**
* ieee80211_tx_rate_update - transmit rate update callback
*
* Drivers should call this functions with a non-NULL pub sta
@@ -5168,7 +5327,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info);
/**
- * ieee80211_tx_status - transmit status callback
+ * ieee80211_tx_status_skb - transmit status callback
*
* Call this function for all transmitted frames after they have been
* transmitted. It is permissible to not call this function for
@@ -5183,13 +5342,13 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
* @hw: the hardware the frame was transmitted by
* @skb: the frame that was transmitted, owned by mac80211 after this call
*/
-void ieee80211_tx_status(struct ieee80211_hw *hw,
- struct sk_buff *skb);
+void ieee80211_tx_status_skb(struct ieee80211_hw *hw,
+ struct sk_buff *skb);
/**
* ieee80211_tx_status_ext - extended transmit status callback
*
- * This function can be used as a replacement for ieee80211_tx_status
+ * This function can be used as a replacement for ieee80211_tx_status_skb()
* in drivers that may want to provide extra information that does not
* fit into &struct ieee80211_tx_info.
*
@@ -5206,7 +5365,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
/**
* ieee80211_tx_status_noskb - transmit status callback without skb
*
- * This function can be used as a replacement for ieee80211_tx_status
+ * This function can be used as a replacement for ieee80211_tx_status_skb()
* in drivers that cannot reliably map tx status information back to
* specific skbs.
*
@@ -5234,9 +5393,9 @@ static inline void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
/**
* ieee80211_tx_status_ni - transmit status callback (in process context)
*
- * Like ieee80211_tx_status() but can be called in process context.
+ * Like ieee80211_tx_status_skb() but can be called in process context.
*
- * Calls to this function, ieee80211_tx_status() and
+ * Calls to this function, ieee80211_tx_status_skb() and
* ieee80211_tx_status_irqsafe() may not be mixed
* for a single hardware.
*
@@ -5247,17 +5406,17 @@ static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw,
struct sk_buff *skb)
{
local_bh_disable();
- ieee80211_tx_status(hw, skb);
+ ieee80211_tx_status_skb(hw, skb);
local_bh_enable();
}
/**
* ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback
*
- * Like ieee80211_tx_status() but can be called in IRQ context
+ * Like ieee80211_tx_status_skb() but can be called in IRQ context
* (internally defers to a tasklet.)
*
- * Calls to this function, ieee80211_tx_status() and
+ * Calls to this function, ieee80211_tx_status_skb() and
* ieee80211_tx_status_ni() may not be mixed for a single hardware.
*
* @hw: the hardware the frame was transmitted by
@@ -5441,6 +5600,7 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
/**
* ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* The beacon counter should be updated after each beacon transmission.
* This function is called implicitly when
@@ -5450,7 +5610,8 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
*
* Return: new countdown value
*/
-u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif);
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown
@@ -5468,30 +5629,34 @@ void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter);
/**
* ieee80211_csa_finish - notify mac80211 about channel switch
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* After a channel switch announcement was scheduled and the counter in this
* announcement hits 1, this function must be called by the driver to
* notify mac80211 that the channel can be changed.
*/
-void ieee80211_csa_finish(struct ieee80211_vif *vif);
+void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id);
/**
* ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
- * This function returns whether the countdown reached zero.
+ * Return: %true if the countdown reached 1, %false otherwise
*/
-bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_color_change_finish - notify mac80211 about color change
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* After a color change announcement was scheduled and the counter in this
* announcement hits 1, this function must be called by the driver to
* notify mac80211 that the color can be changed
*/
-void ieee80211_color_change_finish(struct ieee80211_vif *vif);
+void ieee80211_color_change_finish(struct ieee80211_vif *vif, u8 link_id);
/**
* ieee80211_proberesp_get - retrieve a Probe Response template
@@ -5805,12 +5970,11 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
* ieee80211_remove_key - remove the given key
* @keyconf: the parameter passed with the set key
*
+ * Context: Must be called with the wiphy mutex held.
+ *
* Remove the given key. If the key was uploaded to the hardware at the
* time this function is called, it is not deleted in the hardware but
* instead assumed to have been removed already.
- *
- * Note that due to locking considerations this function can (currently)
- * only be called during key iteration (ieee80211_iter_keys().)
*/
void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
@@ -5818,13 +5982,14 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
* ieee80211_gtk_rekey_add - add a GTK key from rekeying during WoWLAN
* @vif: the virtual interface to add the key on
* @keyconf: new key data
+ * @link_id: the link id of the key or -1 for non-MLO
*
* When GTK rekeying was done while the system was suspended, (a) new
* key(s) will be available. These will be needed by mac80211 for proper
* RX processing, so this function allows setting them.
*
- * The function returns the newly allocated key structure, which will
- * have similar contents to the passed key configuration but point to
+ * Return: the newly allocated key structure, which will have
+ * similar contents to the passed key configuration but point to
* mac80211-owned memory. In case of errors, the function returns an
* ERR_PTR(), use IS_ERR() etc.
*
@@ -5845,7 +6010,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
*/
struct ieee80211_key_conf *
ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
- struct ieee80211_key_conf *keyconf);
+ struct ieee80211_key_conf *keyconf,
+ int link_id);
/**
* ieee80211_gtk_rekey_notify - notify userspace supplicant of rekeying
@@ -6240,6 +6406,8 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
* may be %NULL if the link ID is not needed
*
* Obtain the STA by link address, must use RCU protection.
+ *
+ * Return: pointer to STA if found, otherwise %NULL.
*/
struct ieee80211_sta *
ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw,
@@ -6369,8 +6537,8 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
* @hw: pointer obtained from ieee80211_alloc_hw()
* @txq: pointer obtained from station or virtual interface
*
- * Return true if the AQL's airtime limit has not been reached and the txq can
- * continue to send more packets to the device. Otherwise return false.
+ * Return: %true if the AQL's airtime limit has not been reached and the txq can
+ * continue to send more packets to the device. Otherwise return %false.
*/
bool
ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
@@ -6382,12 +6550,12 @@ ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
* @iter: iterator function that will be called for each key
* @iter_data: custom data to pass to the iterator function
*
+ * Context: Must be called with wiphy mutex held; can sleep.
+ *
* This function can be used to iterate all the keys known to
* mac80211, even those that weren't previously programmed into
* the device. This is intended for use in WoWLAN if the device
- * needs reprogramming of the keys during suspend. Note that due
- * to locking reasons, it is also only safe to call this at few
- * spots since it must hold the RTNL and be able to sleep.
+ * needs reprogramming of the keys during suspend.
*
* The order in which the keys are iterated matches the order
* in which they were originally installed and handed to the
@@ -6570,30 +6738,34 @@ void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp);
* ieee80211_radar_detected - inform that a radar was detected
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @chanctx_conf: Channel context on which radar is detected. Mandatory to
+ * pass a valid pointer during MLO. For non-MLO %NULL can be passed
*/
-void ieee80211_radar_detected(struct ieee80211_hw *hw);
+void ieee80211_radar_detected(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf);
/**
* ieee80211_chswitch_done - Complete channel switch process
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @success: make the channel switch successful or not
+ * @link_id: the link_id on which the switch was done. Ignored if success is
+ * false.
*
* Complete the channel switch post-process: set the new operational channel
* and wake up the suspended queues.
*/
-void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
+void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success,
+ unsigned int link_id);
/**
* ieee80211_channel_switch_disconnect - disconnect due to channel switch error
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
- * @block_tx: if %true, do not send deauth frame.
*
* Instruct mac80211 to disconnect due to a channel switch error. The channel
* switch can request to block the tx and so, we need to make sure we do not send
* a deauth frame in this case.
*/
-void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif,
- bool block_tx);
+void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif);
/**
* ieee80211_request_smps - request SM PS transition
@@ -6870,6 +7042,8 @@ bool rate_usable_index_exists(struct ieee80211_supported_band *sband,
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @pubsta: &struct ieee80211_sta pointer to the target destination.
* @rates: new tx rate set to be used for this station.
+ *
+ * Return: 0 on success. An error code otherwise.
*/
int rate_control_set_rates(struct ieee80211_hw *hw,
struct ieee80211_sta *pubsta,
@@ -7030,6 +7204,8 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif,
* @band: the band to transmit on
* @sta: optional pointer to get the station to send the frame to
*
+ * Return: %true if the skb was prepared, %false otherwise
+ *
* Note: must be called under RCU lock
*/
bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
@@ -7046,6 +7222,8 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
*
* @skb: packet injected by userspace
* @dev: the &struct device of this 802.11 device
+ *
+ * Return: %true if the radiotap header was parsed, %false otherwise
*/
bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
struct net_device *dev);
@@ -7155,7 +7333,7 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid);
* @txq: pointer obtained from station or virtual interface, or from
* ieee80211_next_txq()
*
- * Returns the skb if successful, %NULL if no frame was available.
+ * Return: the skb if successful, %NULL if no frame was available.
*
* Note that this must be called in an rcu_read_lock() critical section,
* which can only be released after the SKB was handled. Some pointers in
@@ -7181,6 +7359,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @txq: pointer obtained from station or virtual interface, or from
* ieee80211_next_txq()
+ *
+ * Return: the skb if successful, %NULL if no frame was available.
*/
static inline struct sk_buff *ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
@@ -7212,7 +7392,7 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw,
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @ac: AC number to return packets from.
*
- * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
+ * Return: the next txq if successful, %NULL if no queue is eligible. If a txq
* is returned, it should be returned with ieee80211_return_txq() after the
* driver has finished scheduling it.
*/
@@ -7277,7 +7457,7 @@ ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
*
* This function is used to check whether given txq is allowed to transmit by
* the airtime scheduler, and can be used by drivers to access the airtime
- * fairness accounting without going using the scheduling order enfored by
+ * fairness accounting without using the scheduling order enforced by
* next_txq().
*
* Returns %true if the airtime scheduler thinks the TXQ should be allowed to
@@ -7295,6 +7475,8 @@ ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @txq: pointer obtained from station or virtual interface
+ *
+ * Return: %true if transmission is allowed, %false otherwise
*/
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq);
@@ -7355,6 +7537,8 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif,
* @status: &struct ieee80211_rx_status containing the transmission rate
* information.
* @len: frame length in bytes
+ *
+ * Return: the airtime estimate
*/
u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
struct ieee80211_rx_status *status,
@@ -7369,23 +7553,13 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @info: &struct ieee80211_tx_info of the frame.
* @len: frame length in bytes
+ *
+ * Return: the airtime estimate
*/
u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info,
int len);
/**
- * ieee80211_set_hw_80211_encap - enable hardware encapsulation offloading.
- *
- * This function is used to notify mac80211 that a vif can be passed raw 802.3
- * frames. The driver needs to then handle the 802.11 encapsulation inside the
- * hardware or firmware.
- *
- * @vif: &struct ieee80211_vif pointer from the add_interface callback.
- * @enable: indicate if the feature should be turned on or off
- */
-bool ieee80211_set_hw_80211_encap(struct ieee80211_vif *vif, bool enable);
-
-/**
* ieee80211_get_fils_discovery_tmpl - Get FILS discovery template.
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
@@ -7414,15 +7588,15 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
/**
* ieee80211_obss_color_collision_notify - notify userland about a BSS color
* collision.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
* aware of.
- * @gfp: allocation flags
*/
void
ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap, gfp_t gfp);
+ u64 color_bitmap, u8 link_id);
/**
* ieee80211_is_tx_data - check if frame is a data frame
@@ -7431,6 +7605,8 @@ ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
* hardware encapsulation enabled are data frames.
*
* @skb: the frame to be transmitted.
+ *
+ * Return: %true if @skb is a data frame, %false otherwise
*/
static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
{
@@ -7446,6 +7622,9 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* @vif: interface to set active links on
* @active_links: the new active links bitmap
*
+ * Context: Must be called with wiphy mutex held; may sleep; calls
+ * back into the driver.
+ *
* This changes the active links on an interface. The interface
* must be in client mode (in AP mode, all links are always active),
* and @active_links must be a subset of the vif's valid_links.
@@ -7453,6 +7632,7 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* If a link is switched off and another is switched on at the same
* time (e.g. active_links going from 0x1 to 0x10) then you will get
* a sequence of calls like
+ *
* - change_vif_links(0x11)
* - unassign_vif_chanctx(link_id=0)
* - change_sta_links(0x11) for each affected STA (the AP)
@@ -7463,9 +7643,7 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* - assign_vif_chanctx(link_id=4)
* - change_vif_links(0x10)
*
- * Note: This function acquires some mac80211 locks and must not
- * be called with any driver locks held that could cause a
- * lock dependency inversion. Best call it without locks.
+ * Return: 0 on success. An error code otherwise.
*/
int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
@@ -7482,4 +7660,26 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
u16 active_links);
+/**
+ * ieee80211_send_teardown_neg_ttlm - tear down a negotiated TTLM request
+ * @vif: the interface on which the tear down request should be sent.
+ *
+ * This function can be used to tear down a previously accepted negotiated
+ * TTLM request.
+ */
+void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif);
+
+/* for older drivers - let's not document these ... */
+int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx);
+void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx);
+void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx,
+ u32 changed);
+int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode);
+
#endif /* MAC80211_H */
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index 4a3a9de9da73..d72006a85f02 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -7,7 +7,7 @@
#ifndef NET_MAC802154_H
#define NET_MAC802154_H
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <net/af_ieee802154.h>
#include <linux/ieee802154.h>
#include <linux/skbuff.h>
@@ -140,7 +140,7 @@ enum ieee802154_hw_flags {
*
* xmit_sync:
* Handler that 802.15.4 module calls for each transmitted frame.
- * skb cntains the buffer starting from the IEEE 802.15.4 header.
+ * skb contains the buffer starting from the IEEE 802.15.4 header.
* The low-level driver should send the frame based on available
* configuration. This is called by a workqueue and useful for
* synchronous 802.15.4 drivers.
@@ -152,7 +152,7 @@ enum ieee802154_hw_flags {
*
* xmit_async:
* Handler that 802.15.4 module calls for each transmitted frame.
- * skb cntains the buffer starting from the IEEE 802.15.4 header.
+ * skb contains the buffer starting from the IEEE 802.15.4 header.
* The low-level driver should send the frame based on available
* configuration.
* This function should return zero or negative errno.
diff --git a/include/net/macsec.h b/include/net/macsec.h
index 75340c3e0c8b..de216cbc6b05 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -247,6 +247,23 @@ struct macsec_secy {
/**
* struct macsec_context - MACsec context for hardware offloading
+ * @netdev: a valid pointer to a struct net_device if @offload ==
+ * MACSEC_OFFLOAD_MAC
+ * @phydev: a valid pointer to a struct phy_device if @offload ==
+ * MACSEC_OFFLOAD_PHY
+ * @offload: MACsec offload status
+ * @secy: pointer to a MACsec SecY
+ * @rx_sc: pointer to a RX SC
+ * @update_pn: when updating the SA, update the next PN
+ * @assoc_num: association number of the target SA
+ * @key: key of the target SA
+ * @rx_sa: pointer to an RX SA if a RX SA is added/updated/removed
+ * @tx_sa: pointer to an TX SA if a TX SA is added/updated/removed
+ * @tx_sc_stats: pointer to TX SC stats structure
+ * @tx_sa_stats: pointer to TX SA stats structure
+ * @rx_sc_stats: pointer to RX SC stats structure
+ * @rx_sa_stats: pointer to RX SA stats structure
+ * @dev_stats: pointer to dev stats structure
*/
struct macsec_context {
union {
@@ -277,6 +294,34 @@ struct macsec_context {
/**
* struct macsec_ops - MACsec offloading operations
+ * @mdo_dev_open: called when the MACsec interface transitions to the up state
+ * @mdo_dev_stop: called when the MACsec interface transitions to the down
+ * state
+ * @mdo_add_secy: called when a new SecY is added
+ * @mdo_upd_secy: called when the SecY flags are changed or the MAC address of
+ * the MACsec interface is changed
+ * @mdo_del_secy: called when the hw offload is disabled or the MACsec
+ * interface is removed
+ * @mdo_add_rxsc: called when a new RX SC is added
+ * @mdo_upd_rxsc: called when a certain RX SC is updated
+ * @mdo_del_rxsc: called when a certain RX SC is removed
+ * @mdo_add_rxsa: called when a new RX SA is added
+ * @mdo_upd_rxsa: called when a certain RX SA is updated
+ * @mdo_del_rxsa: called when a certain RX SA is removed
+ * @mdo_add_txsa: called when a new TX SA is added
+ * @mdo_upd_txsa: called when a certain TX SA is updated
+ * @mdo_del_txsa: called when a certain TX SA is removed
+ * @mdo_get_dev_stats: called when dev stats are read
+ * @mdo_get_tx_sc_stats: called when TX SC stats are read
+ * @mdo_get_tx_sa_stats: called when TX SA stats are read
+ * @mdo_get_rx_sc_stats: called when RX SC stats are read
+ * @mdo_get_rx_sa_stats: called when RX SA stats are read
+ * @mdo_insert_tx_tag: called to insert the TX tag
+ * @needed_headroom: number of bytes reserved at the beginning of the sk_buff
+ * for the TX tag
+ * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the
+ * TX tag
+ * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst
*/
struct macsec_ops {
/* Device wide */
@@ -303,6 +348,11 @@ struct macsec_ops {
int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx);
int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx);
int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx);
+ /* Offload tag */
+ int (*mdo_insert_tx_tag)(struct phy_device *phydev,
+ struct sk_buff *skb);
+ unsigned int needed_headroom;
+ unsigned int needed_tailroom;
bool rx_uses_md_dst;
};
@@ -326,4 +376,9 @@ static inline void *macsec_netdev_priv(const struct net_device *dev)
return netdev_priv(dev);
}
+static inline u64 sci_to_cpu(sci_t sci)
+{
+ return be64_to_cpu((__force __be64)sci);
+}
+
#endif /* _NET_MACSEC_H_ */
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 3965343fdee0..6a0e83ac0fdb 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -60,12 +60,14 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_DONE = 131,
GDMA_EQE_HWC_SOC_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
+ GDMA_EQE_RNIC_QP_FATAL = 176,
};
enum {
GDMA_DEVICE_NONE = 0,
GDMA_DEVICE_HWC = 1,
GDMA_DEVICE_MANA = 2,
+ GDMA_DEVICE_MANA_IB = 3,
};
struct gdma_resource {
@@ -149,6 +151,7 @@ struct gdma_general_req {
#define GDMA_MESSAGE_V1 1
#define GDMA_MESSAGE_V2 2
+#define GDMA_MESSAGE_V3 3
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
@@ -301,6 +304,7 @@ struct gdma_queue {
u32 head;
u32 tail;
+ struct list_head entry;
/* Extra fields specific to EQ/CQ. */
union {
@@ -336,6 +340,7 @@ struct gdma_queue_spec {
void *context;
unsigned long log2_throttle_limit;
+ unsigned int msix_index;
} eq;
struct {
@@ -352,7 +357,9 @@ struct gdma_queue_spec {
struct gdma_irq_context {
void (*handler)(void *arg);
- void *arg;
+ /* Protect the eq_list */
+ spinlock_t lock;
+ struct list_head eq_list;
char name[MANA_IRQ_NAME_SZ];
};
@@ -363,7 +370,6 @@ struct gdma_context {
unsigned int max_num_queues;
unsigned int max_num_msix;
unsigned int num_msix_usable;
- struct gdma_resource msix_resource;
struct gdma_irq_context *irq_contexts;
/* L2 MTU */
@@ -395,9 +401,10 @@ struct gdma_context {
/* Azure network adapter */
struct gdma_dev mana;
-};
-#define MAX_NUM_GDMA_DEVICES 4
+ /* Azure RDMA adapter */
+ struct gdma_dev mana_ib;
+};
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
{
@@ -543,11 +550,17 @@ enum {
*/
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
+#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
+
+/* Driver can handle holes (zeros) in the device list */
+#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
- GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
+ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
+ GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
+ GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
#define GDMA_DRV_CAP_FLAGS2 0
@@ -608,11 +621,12 @@ struct gdma_query_max_resources_resp {
}; /* HW DATA */
/* GDMA_LIST_DEVICES */
+#define GDMA_DEV_LIST_SIZE 64
struct gdma_list_devices_resp {
struct gdma_resp_hdr hdr;
u32 num_of_devs;
u32 reserved;
- struct gdma_dev_id devs[64];
+ struct gdma_dev_id devs[GDMA_DEV_LIST_SIZE];
}; /* HW DATA */
/* GDMA_REGISTER_DEVICE */
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 3d3b5c881bc1..158b125692c2 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -121,7 +121,7 @@ struct hwc_dma_buf {
u32 gpa_mkey;
u32 num_reqs;
- struct hwc_work_request reqs[];
+ struct hwc_work_request reqs[] __counted_by(num_reqs);
};
typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id,
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 7892b79854f6..f2a5200d8a0f 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -30,17 +30,29 @@ enum TRI_STATE {
};
/* Number of entries for hardware indirection table must be in power of 2 */
-#define MANA_INDIRECT_TABLE_SIZE 64
-#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
+#define MANA_INDIRECT_TABLE_MAX_SIZE 512
+#define MANA_INDIRECT_TABLE_DEF_SIZE 64
/* The Toeplitz hash key's length in bytes: should be multiple of 8 */
#define MANA_HASH_KEY_SIZE 40
#define COMP_ENTRY_SIZE 64
-#define RX_BUFFERS_PER_QUEUE 512
+/* This Max value for RX buffers is derived from __alloc_page()'s max page
+ * allocation calculation. It allows maximum 2^(MAX_ORDER -1) pages. RX buffer
+ * size beyond this value gets rejected by __alloc_page() call.
+ */
+#define MAX_RX_BUFFERS_PER_QUEUE 8192
+#define DEF_RX_BUFFERS_PER_QUEUE 512
+#define MIN_RX_BUFFERS_PER_QUEUE 128
-#define MAX_SEND_BUFFERS_PER_QUEUE 256
+/* This max value for TX buffers is derived as the maximum allocatable
+ * pages supported on host per guest through testing. TX buffer size beyond
+ * this value is rejected by the hardware.
+ */
+#define MAX_TX_BUFFERS_PER_QUEUE 16384
+#define DEF_TX_BUFFERS_PER_QUEUE 256
+#define MIN_TX_BUFFERS_PER_QUEUE 128
#define EQ_SIZE (8 * MANA_PAGE_SIZE)
@@ -288,7 +300,7 @@ struct mana_recv_buf_oob {
void *buf_va;
bool from_pool; /* allocated from a page pool */
- /* SGL of the buffer going to be sent has part of the work request. */
+ /* SGL of the buffer going to be sent as part of the work request. */
u32 num_sge;
struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES];
@@ -342,7 +354,7 @@ struct mana_rxq {
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
*/
- struct mana_recv_buf_oob rx_oobs[];
+ struct mana_recv_buf_oob rx_oobs[] __counted_by(num_rx_buf);
};
struct mana_tx_qp {
@@ -356,6 +368,25 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
+ u64 hc_rx_discards_no_wqe;
+ u64 hc_rx_err_vport_disabled;
+ u64 hc_rx_bytes;
+ u64 hc_rx_ucast_pkts;
+ u64 hc_rx_ucast_bytes;
+ u64 hc_rx_bcast_pkts;
+ u64 hc_rx_bcast_bytes;
+ u64 hc_rx_mcast_pkts;
+ u64 hc_rx_mcast_bytes;
+ u64 hc_tx_err_gf_disabled;
+ u64 hc_tx_err_vport_disabled;
+ u64 hc_tx_err_inval_vportoffset_pkt;
+ u64 hc_tx_err_vlan_enforcement;
+ u64 hc_tx_err_eth_type_enforcement;
+ u64 hc_tx_err_sa_enforcement;
+ u64 hc_tx_err_sqpdid_enforcement;
+ u64 hc_tx_err_cqpdid_enforcement;
+ u64 hc_tx_err_mtu_violation;
+ u64 hc_tx_err_inval_oob;
u64 hc_tx_bytes;
u64 hc_tx_ucast_pkts;
u64 hc_tx_ucast_bytes;
@@ -363,6 +394,7 @@ struct mana_ethtool_stats {
u64 hc_tx_bcast_bytes;
u64 hc_tx_mcast_pkts;
u64 hc_tx_mcast_bytes;
+ u64 hc_tx_err_gdma;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
u64 rx_coalesced_err;
@@ -394,10 +426,11 @@ struct mana_port_context {
struct mana_tx_qp *tx_qp;
/* Indirection Table for RX & TX. The values are queue indexes */
- u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
+ u32 *indir_table;
+ u32 indir_table_sz;
/* Indirection table containing RxObject Handles */
- mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
+ mana_handle_t *rxobj_table;
/* Hash key used by the NIC */
u8 hashkey[MANA_HASH_KEY_SIZE];
@@ -419,6 +452,9 @@ struct mana_port_context {
unsigned int max_queues;
unsigned int num_queues;
+ unsigned int rx_queue_size;
+ unsigned int tx_queue_size;
+
mana_handle_t port_handle;
mana_handle_t pf_filter_handle;
@@ -454,6 +490,8 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
void mana_query_gf_stats(struct mana_port_context *apc);
+int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
+void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
@@ -605,8 +643,8 @@ struct mana_query_gf_stat_resp {
struct gdma_resp_hdr hdr;
u64 reported_stats;
/* rx errors/discards */
- u64 discard_rx_nowqe;
- u64 err_rx_vport_disabled;
+ u64 rx_discards_nowqe;
+ u64 rx_err_vport_disabled;
/* rx bytes/packets */
u64 hc_rx_bytes;
u64 hc_rx_ucast_pkts;
@@ -616,16 +654,16 @@ struct mana_query_gf_stat_resp {
u64 hc_rx_mcast_pkts;
u64 hc_rx_mcast_bytes;
/* tx errors */
- u64 err_tx_gf_disabled;
- u64 err_tx_vport_disabled;
- u64 err_tx_inval_vport_offset_pkt;
- u64 err_tx_vlan_enforcement;
- u64 err_tx_ethtype_enforcement;
- u64 err_tx_SA_enforecement;
- u64 err_tx_SQPDID_enforcement;
- u64 err_tx_CQPDID_enforcement;
- u64 err_tx_mtu_violation;
- u64 err_tx_inval_oob;
+ u64 tx_err_gf_disabled;
+ u64 tx_err_vport_disabled;
+ u64 tx_err_inval_vport_offset_pkt;
+ u64 tx_err_vlan_enforcement;
+ u64 tx_err_ethtype_enforcement;
+ u64 tx_err_SA_enforcement;
+ u64 tx_err_SQPDID_enforcement;
+ u64 tx_err_CQPDID_enforcement;
+ u64 tx_err_mtu_violation;
+ u64 tx_err_inval_oob;
/* tx bytes/packets */
u64 hc_tx_bytes;
u64 hc_tx_ucast_pkts;
@@ -635,7 +673,7 @@ struct mana_query_gf_stat_resp {
u64 hc_tx_mcast_pkts;
u64 hc_tx_mcast_bytes;
/* tx error */
- u64 err_tx_gdma;
+ u64 tx_err_gdma;
}; /* HW DATA */
/* Configure vPort Rx Steering */
@@ -654,6 +692,7 @@ struct mana_cfg_rx_steer_req_v2 {
u8 hashkey[MANA_HASH_KEY_SIZE];
u8 cqe_coalescing_enable;
u8 reserved2[7];
+ mana_handle_t indir_tab[] __counted_by(num_indir_entries);
}; /* HW DATA */
struct mana_cfg_rx_steer_resp {
@@ -779,4 +818,6 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
u32 doorbell_pg_id);
void mana_uncfg_vport(struct mana_port_context *apc);
+
+struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index);
#endif /* _MANA_H */
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 1eb1b4393e46..28d59ae94ca3 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -87,7 +87,7 @@ struct mctp_sock {
};
/* Key for matching incoming packets to sockets or reassembly contexts.
- * Packets are matched on (src,dest,tag).
+ * Packets are matched on (peer EID, local EID, tag).
*
* Lifetime / locking requirements:
*
@@ -133,6 +133,7 @@ struct mctp_sock {
* - through an expiry timeout, on a per-socket timer
*/
struct mctp_sk_key {
+ unsigned int net;
mctp_eid_t peer_addr;
mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */
__u8 tag; /* incoming tag match; invert TO for local */
@@ -255,7 +256,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
void mctp_key_unref(struct mctp_sk_key *key);
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t daddr, mctp_eid_t saddr,
+ unsigned int netid,
+ mctp_eid_t local, mctp_eid_t peer,
bool manual, u8 *tagp);
/* routing <--> device interface */
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index fb996124b3d5..814b5f2e3ed5 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -97,6 +97,9 @@ struct mptcp_out_options {
};
#define MPTCP_SCHED_NAME_MAX 16
+#define MPTCP_SCHED_MAX 128
+#define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX)
+
#define MPTCP_SUBFLOWS_MAX 8
struct mptcp_sched_data {
@@ -220,6 +223,8 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
return htonl(0u);
}
+
+void mptcp_active_detect_blackhole(struct sock *sk, bool expired);
#else
static inline void mptcp_init(void)
@@ -304,6 +309,8 @@ static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct reques
}
static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); }
+
+static inline void mptcp_active_detect_blackhole(struct sock *sk, bool expired) { }
#endif /* CONFIG_MPTCP */
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 9bbdf6eaa942..3c88d5bc5eed 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -147,11 +147,6 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
* The following hooks can be defined; unless noted otherwise, they are
* optional and can be filled with a null pointer.
*
- * int (*is_useropt)(u8 nd_opt_type):
- * This function is called when IPv6 decide RA userspace options. if
- * this function returns 1 then the option given by nd_opt_type will
- * be handled as userspace option additional to the IPv6 options.
- *
* int (*parse_options)(const struct net_device *dev,
* struct nd_opt_hdr *nd_opt,
* struct ndisc_options *ndopts):
@@ -200,7 +195,6 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data,
* addresses. E.g. 802.15.4 6LoWPAN.
*/
struct ndisc_ops {
- int (*is_useropt)(u8 nd_opt_type);
int (*parse_options)(const struct net_device *dev,
struct nd_opt_hdr *nd_opt,
struct ndisc_options *ndopts);
@@ -224,15 +218,6 @@ struct ndisc_ops {
};
#if IS_ENABLED(CONFIG_IPV6)
-static inline int ndisc_ops_is_useropt(const struct net_device *dev,
- u8 nd_opt_type)
-{
- if (dev->ndisc_ops && dev->ndisc_ops->is_useropt)
- return dev->ndisc_ops->is_useropt(nd_opt_type);
- else
- return 0;
-}
-
static inline int ndisc_ops_parse_options(const struct net_device *dev,
struct nd_opt_hdr *nd_opt,
struct ndisc_options *ndopts)
@@ -486,7 +471,7 @@ void igmp6_event_report(struct sk_buff *skb);
#ifdef CONFIG_SYSCTL
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write,
+int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 0d28172193fa..a44f262a7384 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -412,12 +412,12 @@ void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *,
void *neigh_seq_next(struct seq_file *, void *, loff_t *);
void neigh_seq_stop(struct seq_file *, void *);
-int neigh_proc_dointvec(struct ctl_table *ctl, int write,
+int neigh_proc_dointvec(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
+int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write,
void *buffer,
size_t *lenp, loff_t *ppos);
-int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
+int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 958c805df191..da93873df4db 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -67,8 +67,6 @@ struct net {
*/
spinlock_t rules_mod_lock;
- atomic_t dev_unreg_count;
-
unsigned int dev_base_seq; /* protected by rtnl_mutex */
u32 ifindex;
@@ -369,21 +367,30 @@ static inline void put_net_track(struct net *net, netns_tracker *tracker)
typedef struct {
#ifdef CONFIG_NET_NS
- struct net *net;
+ struct net __rcu *net;
#endif
} possible_net_t;
static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
#ifdef CONFIG_NET_NS
- pnet->net = net;
+ rcu_assign_pointer(pnet->net, net);
#endif
}
static inline struct net *read_pnet(const possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
- return pnet->net;
+ return rcu_dereference_protected(pnet->net, true);
+#else
+ return &init_net;
+#endif
+}
+
+static inline struct net *read_pnet_rcu(const possible_net_t *pnet)
+{
+#ifdef CONFIG_NET_NS
+ return rcu_dereference(pnet->net);
#else
return &init_net;
#endif
@@ -442,8 +449,11 @@ struct pernet_operations {
void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
- unsigned int *id;
- size_t size;
+ /* Following method is called with RTNL held. */
+ void (*exit_batch_rtnl)(struct list_head *net_exit_list,
+ struct list_head *dev_kill_list);
+ unsigned int * const id;
+ const size_t size;
};
/*
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 8b8ed4e13d74..173bcfcd868a 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -4,6 +4,131 @@
#include <linux/netdevice.h>
+/* See the netdev.yaml spec for definition of each statistic */
+struct netdev_queue_stats_rx {
+ u64 bytes;
+ u64 packets;
+ u64 alloc_fail;
+
+ u64 hw_drops;
+ u64 hw_drop_overruns;
+
+ u64 csum_unnecessary;
+ u64 csum_none;
+ u64 csum_bad;
+
+ u64 hw_gro_packets;
+ u64 hw_gro_bytes;
+ u64 hw_gro_wire_packets;
+ u64 hw_gro_wire_bytes;
+
+ u64 hw_drop_ratelimits;
+};
+
+struct netdev_queue_stats_tx {
+ u64 bytes;
+ u64 packets;
+
+ u64 hw_drops;
+ u64 hw_drop_errors;
+
+ u64 csum_none;
+ u64 needs_csum;
+
+ u64 hw_gso_packets;
+ u64 hw_gso_bytes;
+ u64 hw_gso_wire_packets;
+ u64 hw_gso_wire_bytes;
+
+ u64 hw_drop_ratelimits;
+
+ u64 stop;
+ u64 wake;
+};
+
+/**
+ * struct netdev_stat_ops - netdev ops for fine grained stats
+ * @get_queue_stats_rx: get stats for a given Rx queue
+ * @get_queue_stats_tx: get stats for a given Tx queue
+ * @get_base_stats: get base stats (not belonging to any live instance)
+ *
+ * Query stats for a given object. The values of the statistics are undefined
+ * on entry (specifically they are *not* zero-initialized). Drivers should
+ * assign values only to the statistics they collect. Statistics which are not
+ * collected must be left undefined.
+ *
+ * Queue objects are not necessarily persistent, and only currently active
+ * queues are queried by the per-queue callbacks. This means that per-queue
+ * statistics will not generally add up to the total number of events for
+ * the device. The @get_base_stats callback allows filling in the delta
+ * between events for currently live queues and overall device history.
+ * @get_base_stats can also be used to report any miscellaneous packets
+ * transferred outside of the main set of queues used by the networking stack.
+ * When the statistics for the entire device are queried, first @get_base_stats
+ * is issued to collect the delta, and then a series of per-queue callbacks.
+ * Only statistics which are set in @get_base_stats will be reported
+ * at the device level, meaning that unlike in queue callbacks, setting
+ * a statistic to zero in @get_base_stats is a legitimate thing to do.
+ * This is because @get_base_stats has a second function of designating which
+ * statistics are in fact correct for the entire device (e.g. when history
+ * for some of the events is not maintained, and reliable "total" cannot
+ * be provided).
+ *
+ * Device drivers can assume that when collecting total device stats,
+ * the @get_base_stats and subsequent per-queue calls are performed
+ * "atomically" (without releasing the rtnl_lock).
+ *
+ * Device drivers are encouraged to reset the per-queue statistics when
+ * number of queues change. This is because the primary use case for
+ * per-queue statistics is currently to detect traffic imbalance.
+ */
+struct netdev_stat_ops {
+ void (*get_queue_stats_rx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_rx *stats);
+ void (*get_queue_stats_tx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_tx *stats);
+ void (*get_base_stats)(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx);
+};
+
+void netdev_stat_queue_sum(struct net_device *netdev,
+ int rx_start, int rx_end,
+ struct netdev_queue_stats_rx *rx_sum,
+ int tx_start, int tx_end,
+ struct netdev_queue_stats_tx *tx_sum);
+
+/**
+ * struct netdev_queue_mgmt_ops - netdev ops for queue management
+ *
+ * @ndo_queue_mem_size: Size of the struct that describes a queue's memory.
+ *
+ * @ndo_queue_mem_alloc: Allocate memory for an RX queue at the specified index.
+ * The new memory is written at the specified address.
+ *
+ * @ndo_queue_mem_free: Free memory from an RX queue.
+ *
+ * @ndo_queue_start: Start an RX queue with the specified memory and at the
+ * specified index.
+ *
+ * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
+ * queue's memory is written at the specified address.
+ */
+struct netdev_queue_mgmt_ops {
+ size_t ndo_queue_mem_size;
+ int (*ndo_queue_mem_alloc)(struct net_device *dev,
+ void *per_queue_mem,
+ int idx);
+ void (*ndo_queue_mem_free)(struct net_device *dev,
+ void *per_queue_mem);
+ int (*ndo_queue_start)(struct net_device *dev,
+ void *per_queue_mem,
+ int idx);
+ int (*ndo_queue_stop)(struct net_device *dev,
+ void *per_queue_mem,
+ int idx);
+};
+
/**
* DOC: Lockless queue stopping / waking helpers.
*
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index cdcafb30d437..596836abf7bf 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -6,6 +6,7 @@
#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <net/xdp.h>
+#include <net/page_pool/types.h>
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
@@ -21,6 +22,11 @@ struct netdev_rx_queue {
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
+ /* NAPI instance for the queue
+ * Readers and writers must hold RTNL
+ */
+ struct napi_struct *napi;
+ struct pp_memory_provider_params mp_params;
} ____cacheline_aligned_in_smp;
/*
@@ -39,7 +45,6 @@ __netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
return dev->_rx + rxq;
}
-#ifdef CONFIG_SYSFS
static inline unsigned int
get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
{
@@ -49,5 +54,7 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
BUG_ON(index >= dev->num_rx_queues);
return index;
}
-#endif
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+
#endif
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 4085765c3370..8cb70e7485e2 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -160,10 +160,6 @@ static inline struct net *nf_ct_net(const struct nf_conn *ct)
return read_pnet(&ct->ct_net);
}
-/* Alter reply tuple (maybe alter helper). */
-void nf_conntrack_alter_reply(struct nf_conn *ct,
- const struct nf_conntrack_tuple *newreply);
-
/* Is this tuple taken? (ignoring any belonging to the given
conntrack). */
int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
@@ -284,6 +280,16 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
}
+static inline void nf_conntrack_alter_reply(struct nf_conn *ct,
+ const struct nf_conntrack_tuple *newreply)
+{
+ /* Must be unconfirmed, so not in hash table yet */
+ if (WARN_ON(nf_ct_is_confirmed(ct)))
+ return;
+
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+}
+
#define nfct_time_stamp ((u32)(jiffies))
/* jiffies until ct expires, 0 if already expired */
@@ -302,8 +308,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct)
/* use after obtaining a reference count */
static inline bool nf_ct_should_gc(const struct nf_conn *ct)
{
- return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
- !nf_ct_is_dying(ct);
+ if (!nf_ct_is_confirmed(ct))
+ return false;
+
+ /* load ct->timeout after is_confirmed() test.
+ * Pairs with __nf_conntrack_confirm() which:
+ * 1. Increases ct->timeout value
+ * 2. Inserts ct into rcu hlist
+ * 3. Sets the confirmed bit
+ * 4. Unlocks the hlist lock
+ */
+ smp_acquire__after_ctrl_dep();
+
+ return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct);
}
#define NF_CT_DAY (86400 * HZ)
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index e227d997fc71..1b58b5b91ff6 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -15,10 +15,8 @@ struct nf_conncount_list {
unsigned int count; /* length of list */
};
-struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
- unsigned int keylen);
-void nf_conncount_destroy(struct net *net, unsigned int family,
- struct nf_conncount_data *data);
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
+void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
unsigned int nf_conncount_count(struct net *net,
struct nf_conncount_data *data,
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index fcb19a4e8f2b..6903f72bcc15 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -39,7 +39,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_LABELS
struct net *net = nf_ct_net(ct);
- if (net->ct.labels_used == 0)
+ if (atomic_read(&net->ct.labels_used) == 0)
return NULL;
return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC);
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index df7775afb92b..1a6fca013165 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -74,12 +74,13 @@ enum nf_flowtable_flags {
};
struct nf_flowtable {
- struct list_head list;
- struct rhashtable rhashtable;
- int priority;
+ unsigned int flags; /* readonly in datapath */
+ int priority; /* control path (padding hole) */
+ struct rhashtable rhashtable; /* datapath, read-mostly members come first */
+
+ struct list_head list; /* slowpath parts */
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
- unsigned int flags;
struct flow_block flow_block;
struct rw_semaphore flow_block_lock; /* Guards flow_block */
possible_net_t net;
@@ -304,11 +305,26 @@ struct flow_ports {
__be16 source, dest;
};
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
+int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd);
+
unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
+#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+extern int nf_flow_register_bpf(void);
+#else
+static inline int nf_flow_register_bpf(void)
+{
+ return 0;
+}
+#endif
+
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
MODULE_ALIAS("nf-flowtable-" __stringify(family))
@@ -353,7 +369,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
{
- if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN))
return false;
*inner_proto = __nf_flow_pppoe_proto(skb);
diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h
index 52e27920f829..cef7a4eb8f97 100644
--- a/include/net/netfilter/nf_hooks_lwtunnel.h
+++ b/include/net/netfilter/nf_hooks_lwtunnel.h
@@ -2,6 +2,6 @@
#include <linux/types.h>
#ifdef CONFIG_SYSCTL
-int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+int nf_hooks_lwtunnel_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index c81021ab07aa..4aeffddb7586 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -35,7 +35,6 @@ struct nf_queue_handler {
void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
-void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8321915dddb2..757abcb54d11 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -2,7 +2,7 @@
#ifndef _NET_NF_TABLES_H
#define _NET_NF_TABLES_H
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/list.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
@@ -209,6 +209,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
* @family: protocol family
* @level: depth of the chains
* @report: notify via unicast netlink message
+ * @reg_inited: bitmap of initialised registers
*/
struct nft_ctx {
struct net *net;
@@ -221,6 +222,7 @@ struct nft_ctx {
u8 family;
u8 level;
bool report;
+ DECLARE_BITMAP(reg_inited, NFT_REG32_NUM);
};
enum nft_data_desc_flags {
@@ -254,7 +256,8 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
-int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len);
+int nft_parse_register_load(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *sreg, u32 len);
int nft_parse_register_store(const struct nft_ctx *ctx,
const struct nlattr *attr, u8 *dreg,
const struct nft_data *data,
@@ -275,6 +278,9 @@ struct nft_userdata {
unsigned char data[];
};
+/* placeholder structure for opaque set element backend representation. */
+struct nft_elem_priv { };
+
/**
* struct nft_set_elem - generic representation of set elements
*
@@ -296,12 +302,19 @@ struct nft_set_elem {
u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
struct nft_data val;
} data;
- void *priv;
+ struct nft_elem_priv *priv;
};
+static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
+{
+ return (void *)priv;
+}
+
+
/**
* enum nft_iter_type - nftables set iterator type
*
+ * @NFT_ITER_UNSPEC: unspecified, to catch errors
* @NFT_ITER_READ: read-only iteration over set elements
* @NFT_ITER_UPDATE: iteration under mutex to update set element state
*/
@@ -321,7 +334,7 @@ struct nft_set_iter {
int (*fn)(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
};
/**
@@ -407,7 +420,7 @@ struct nft_expr_info;
int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla,
struct nft_expr_info *info);
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
+int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr, bool reset);
@@ -429,6 +442,9 @@ struct nft_set_ext;
* @remove: remove element from set
* @walk: iterate over all set elements
* @get: get set elements
+ * @ksize: kernel set size
+ * @usize: userspace set size
+ * @adjust_maxsize: delta to adjust maximum set size
* @commit: commit set elements
* @abort: abort set elements
* @privsize: function to return size of set private data
@@ -449,7 +465,8 @@ struct nft_set_ops {
const struct nft_set_ext **ext);
bool (*update)(struct nft_set *set,
const u32 *key,
- void *(*new)(struct nft_set *,
+ struct nft_elem_priv *
+ (*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *),
const struct nft_expr *expr,
@@ -461,27 +478,30 @@ struct nft_set_ops {
int (*insert)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext);
+ struct nft_elem_priv **priv);
void (*activate)(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem);
- void * (*deactivate)(const struct net *net,
+ struct nft_elem_priv *elem_priv);
+ struct nft_elem_priv * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
- bool (*flush)(const struct net *net,
+ void (*flush)(const struct net *net,
const struct nft_set *set,
- void *priv);
+ struct nft_elem_priv *priv);
void (*remove)(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
void (*walk)(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter);
- void * (*get)(const struct net *net,
+ struct nft_elem_priv * (*get)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
- void (*commit)(const struct nft_set *set);
+ u32 (*ksize)(u32 size);
+ u32 (*usize)(u32 size);
+ u32 (*adjust_maxsize)(const struct nft_set *set);
+ void (*commit)(struct nft_set *set);
void (*abort)(const struct nft_set *set);
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
@@ -673,9 +693,8 @@ void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
* @NFT_SET_EXT_DATA: mapping data
* @NFT_SET_EXT_FLAGS: element flags
* @NFT_SET_EXT_TIMEOUT: element timeout
- * @NFT_SET_EXT_EXPIRATION: element expiration time
* @NFT_SET_EXT_USERDATA: user data associated with the element
- * @NFT_SET_EXT_EXPRESSIONS: expressions assiciated with the element
+ * @NFT_SET_EXT_EXPRESSIONS: expressions associated with the element
* @NFT_SET_EXT_OBJREF: stateful object reference associated with element
* @NFT_SET_EXT_NUM: number of extension types
*/
@@ -685,7 +704,6 @@ enum nft_set_extensions {
NFT_SET_EXT_DATA,
NFT_SET_EXT_FLAGS,
NFT_SET_EXT_TIMEOUT,
- NFT_SET_EXT_EXPIRATION,
NFT_SET_EXT_USERDATA,
NFT_SET_EXT_EXPRESSIONS,
NFT_SET_EXT_OBJREF,
@@ -800,14 +818,14 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
}
-static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
-{
- return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
-}
+struct nft_timeout {
+ u64 timeout;
+ u64 expiration;
+};
-static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext)
+static inline struct nft_timeout *nft_set_ext_timeout(const struct nft_set_ext *ext)
{
- return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
+ return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
}
static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext)
@@ -820,16 +838,25 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS);
}
+static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
+ u64 tstamp)
+{
+ if (!nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) ||
+ READ_ONCE(nft_set_ext_timeout(ext)->timeout) == 0)
+ return false;
+
+ return time_after_eq64(tstamp, READ_ONCE(nft_set_ext_timeout(ext)->expiration));
+}
+
static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
{
- return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
- time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
+ return __nft_set_elem_expired(ext, get_jiffies_64());
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
- void *elem)
+ const struct nft_elem_priv *elem_priv)
{
- return elem + set->ops->elemsize;
+ return (void *)elem_priv + set->ops->elemsize;
}
static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext)
@@ -841,16 +868,19 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nlattr *attr);
-void *nft_set_elem_init(const struct nft_set *set,
- const struct nft_set_ext_tmpl *tmpl,
- const u32 *key, const u32 *key_end, const u32 *data,
- u64 timeout, u64 expiration, gfp_t gfp);
+struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const u32 *key, const u32 *key_end,
+ const u32 *data,
+ u64 timeout, u64 expiration, gfp_t gfp);
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[]);
-void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
bool destroy_expr);
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem);
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv);
struct nft_expr_ops;
/**
@@ -924,7 +954,7 @@ struct nft_expr_ops {
struct nft_regs *regs,
const struct nft_pktinfo *pkt);
int (*clone)(struct nft_expr *dst,
- const struct nft_expr *src);
+ const struct nft_expr *src, gfp_t gfp);
unsigned int size;
int (*init)(const struct nft_ctx *ctx,
@@ -943,8 +973,7 @@ struct nft_expr_ops {
const struct nft_expr *expr,
bool reset);
int (*validate)(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data);
+ const struct nft_expr *expr);
bool (*reduce)(struct nft_regs_track *track,
const struct nft_expr *expr);
bool (*gc)(struct net *net,
@@ -1108,7 +1137,7 @@ struct nft_chain {
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
@@ -1246,10 +1275,13 @@ static inline void nft_use_inc_restore(u32 *use)
* @hgenerator: handle generator state
* @handle: table handle
* @use: number of chain references to this table
+ * @family:address family
* @flags: table flag (see enum nft_table_flags)
* @genmask: generation mask
- * @afinfo: address family info
+ * @nlpid: netlink port ID
* @name: name of the table
+ * @udlen: length of the user data
+ * @udata: user data
* @validate_state: internal, set when transaction adds jumps
*/
struct nft_table {
@@ -1277,6 +1309,12 @@ static inline bool nft_table_has_owner(const struct nft_table *table)
return table->flags & NFT_TABLE_F_OWNER;
}
+static inline bool nft_table_is_orphan(const struct nft_table *table)
+{
+ return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) ==
+ NFT_TABLE_F_PERSIST;
+}
+
static inline bool nft_base_chain_netdev(int family, u32 hooknum)
{
return family == NFPROTO_NETDEV ||
@@ -1588,41 +1626,68 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
}
/**
- * struct nft_trans - nf_tables object update in transaction
+ * struct nft_trans - nf_tables object update in transaction
*
- * @list: used internally
- * @binding_list: list of objects with possible bindings
- * @msg_type: message type
- * @put_net: ctx->net needs to be put
- * @ctx: transaction context
- * @data: internal information related to the transaction
+ * @list: used internally
+ * @net: struct net
+ * @table: struct nft_table the object resides in
+ * @msg_type: message type
+ * @seq: netlink sequence number
+ * @flags: modifiers to new request
+ * @report: notify via unicast netlink message
+ * @put_net: net needs to be put
+ *
+ * This is the information common to all objects in the transaction,
+ * this must always be the first member of derived sub-types.
*/
struct nft_trans {
struct list_head list;
- struct list_head binding_list;
+ struct net *net;
+ struct nft_table *table;
int msg_type;
- bool put_net;
- struct nft_ctx ctx;
- char data[];
+ u32 seq;
+ u16 flags;
+ u8 report:1;
+ u8 put_net:1;
+};
+
+/**
+ * struct nft_trans_binding - nf_tables object with binding support in transaction
+ * @nft_trans: base structure, MUST be first member
+ * @binding_list: list of objects with possible bindings
+ *
+ * This is the base type used by objects that can be bound to a chain.
+ */
+struct nft_trans_binding {
+ struct nft_trans nft_trans;
+ struct list_head binding_list;
};
struct nft_trans_rule {
+ struct nft_trans nft_trans;
struct nft_rule *rule;
+ struct nft_chain *chain;
struct nft_flow_rule *flow;
u32 rule_id;
bool bound;
};
-#define nft_trans_rule(trans) \
- (((struct nft_trans_rule *)trans->data)->rule)
-#define nft_trans_flow_rule(trans) \
- (((struct nft_trans_rule *)trans->data)->flow)
-#define nft_trans_rule_id(trans) \
- (((struct nft_trans_rule *)trans->data)->rule_id)
-#define nft_trans_rule_bound(trans) \
- (((struct nft_trans_rule *)trans->data)->bound)
+#define nft_trans_container_rule(trans) \
+ container_of(trans, struct nft_trans_rule, nft_trans)
+#define nft_trans_rule(trans) \
+ nft_trans_container_rule(trans)->rule
+#define nft_trans_flow_rule(trans) \
+ nft_trans_container_rule(trans)->flow
+#define nft_trans_rule_id(trans) \
+ nft_trans_container_rule(trans)->rule_id
+#define nft_trans_rule_bound(trans) \
+ nft_trans_container_rule(trans)->bound
+#define nft_trans_rule_chain(trans) \
+ nft_trans_container_rule(trans)->chain
struct nft_trans_set {
+ struct nft_trans_binding nft_trans_binding;
+ struct list_head list_trans_newset;
struct nft_set *set;
u32 set_id;
u32 gc_int;
@@ -1632,100 +1697,131 @@ struct nft_trans_set {
u32 size;
};
-#define nft_trans_set(trans) \
- (((struct nft_trans_set *)trans->data)->set)
-#define nft_trans_set_id(trans) \
- (((struct nft_trans_set *)trans->data)->set_id)
-#define nft_trans_set_bound(trans) \
- (((struct nft_trans_set *)trans->data)->bound)
-#define nft_trans_set_update(trans) \
- (((struct nft_trans_set *)trans->data)->update)
-#define nft_trans_set_timeout(trans) \
- (((struct nft_trans_set *)trans->data)->timeout)
-#define nft_trans_set_gc_int(trans) \
- (((struct nft_trans_set *)trans->data)->gc_int)
-#define nft_trans_set_size(trans) \
- (((struct nft_trans_set *)trans->data)->size)
+#define nft_trans_container_set(t) \
+ container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans)
+#define nft_trans_set(trans) \
+ nft_trans_container_set(trans)->set
+#define nft_trans_set_id(trans) \
+ nft_trans_container_set(trans)->set_id
+#define nft_trans_set_bound(trans) \
+ nft_trans_container_set(trans)->bound
+#define nft_trans_set_update(trans) \
+ nft_trans_container_set(trans)->update
+#define nft_trans_set_timeout(trans) \
+ nft_trans_container_set(trans)->timeout
+#define nft_trans_set_gc_int(trans) \
+ nft_trans_container_set(trans)->gc_int
+#define nft_trans_set_size(trans) \
+ nft_trans_container_set(trans)->size
struct nft_trans_chain {
+ struct nft_trans_binding nft_trans_binding;
struct nft_chain *chain;
- bool update;
char *name;
struct nft_stats __percpu *stats;
u8 policy;
+ bool update;
bool bound;
u32 chain_id;
struct nft_base_chain *basechain;
struct list_head hook_list;
};
-#define nft_trans_chain(trans) \
- (((struct nft_trans_chain *)trans->data)->chain)
-#define nft_trans_chain_update(trans) \
- (((struct nft_trans_chain *)trans->data)->update)
-#define nft_trans_chain_name(trans) \
- (((struct nft_trans_chain *)trans->data)->name)
-#define nft_trans_chain_stats(trans) \
- (((struct nft_trans_chain *)trans->data)->stats)
-#define nft_trans_chain_policy(trans) \
- (((struct nft_trans_chain *)trans->data)->policy)
-#define nft_trans_chain_bound(trans) \
- (((struct nft_trans_chain *)trans->data)->bound)
-#define nft_trans_chain_id(trans) \
- (((struct nft_trans_chain *)trans->data)->chain_id)
-#define nft_trans_basechain(trans) \
- (((struct nft_trans_chain *)trans->data)->basechain)
-#define nft_trans_chain_hooks(trans) \
- (((struct nft_trans_chain *)trans->data)->hook_list)
+#define nft_trans_container_chain(t) \
+ container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans)
+#define nft_trans_chain(trans) \
+ nft_trans_container_chain(trans)->chain
+#define nft_trans_chain_update(trans) \
+ nft_trans_container_chain(trans)->update
+#define nft_trans_chain_name(trans) \
+ nft_trans_container_chain(trans)->name
+#define nft_trans_chain_stats(trans) \
+ nft_trans_container_chain(trans)->stats
+#define nft_trans_chain_policy(trans) \
+ nft_trans_container_chain(trans)->policy
+#define nft_trans_chain_bound(trans) \
+ nft_trans_container_chain(trans)->bound
+#define nft_trans_chain_id(trans) \
+ nft_trans_container_chain(trans)->chain_id
+#define nft_trans_basechain(trans) \
+ nft_trans_container_chain(trans)->basechain
+#define nft_trans_chain_hooks(trans) \
+ nft_trans_container_chain(trans)->hook_list
struct nft_trans_table {
+ struct nft_trans nft_trans;
bool update;
};
-#define nft_trans_table_update(trans) \
- (((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_container_table(trans) \
+ container_of(trans, struct nft_trans_table, nft_trans)
+#define nft_trans_table_update(trans) \
+ nft_trans_container_table(trans)->update
+
+enum nft_trans_elem_flags {
+ NFT_TRANS_UPD_TIMEOUT = (1 << 0),
+ NFT_TRANS_UPD_EXPIRATION = (1 << 1),
+};
struct nft_trans_elem {
+ struct nft_trans nft_trans;
struct nft_set *set;
- struct nft_set_elem elem;
+ struct nft_elem_priv *elem_priv;
+ u64 timeout;
+ u64 expiration;
+ u8 update_flags;
bool bound;
};
-#define nft_trans_elem_set(trans) \
- (((struct nft_trans_elem *)trans->data)->set)
-#define nft_trans_elem(trans) \
- (((struct nft_trans_elem *)trans->data)->elem)
-#define nft_trans_elem_set_bound(trans) \
- (((struct nft_trans_elem *)trans->data)->bound)
+#define nft_trans_container_elem(t) \
+ container_of(t, struct nft_trans_elem, nft_trans)
+#define nft_trans_elem_set(trans) \
+ nft_trans_container_elem(trans)->set
+#define nft_trans_elem_priv(trans) \
+ nft_trans_container_elem(trans)->elem_priv
+#define nft_trans_elem_update_flags(trans) \
+ nft_trans_container_elem(trans)->update_flags
+#define nft_trans_elem_timeout(trans) \
+ nft_trans_container_elem(trans)->timeout
+#define nft_trans_elem_expiration(trans) \
+ nft_trans_container_elem(trans)->expiration
+#define nft_trans_elem_set_bound(trans) \
+ nft_trans_container_elem(trans)->bound
struct nft_trans_obj {
+ struct nft_trans nft_trans;
struct nft_object *obj;
struct nft_object *newobj;
bool update;
};
-#define nft_trans_obj(trans) \
- (((struct nft_trans_obj *)trans->data)->obj)
-#define nft_trans_obj_newobj(trans) \
- (((struct nft_trans_obj *)trans->data)->newobj)
-#define nft_trans_obj_update(trans) \
- (((struct nft_trans_obj *)trans->data)->update)
+#define nft_trans_container_obj(t) \
+ container_of(t, struct nft_trans_obj, nft_trans)
+#define nft_trans_obj(trans) \
+ nft_trans_container_obj(trans)->obj
+#define nft_trans_obj_newobj(trans) \
+ nft_trans_container_obj(trans)->newobj
+#define nft_trans_obj_update(trans) \
+ nft_trans_container_obj(trans)->update
struct nft_trans_flowtable {
+ struct nft_trans nft_trans;
struct nft_flowtable *flowtable;
- bool update;
struct list_head hook_list;
u32 flags;
+ bool update;
};
-#define nft_trans_flowtable(trans) \
- (((struct nft_trans_flowtable *)trans->data)->flowtable)
-#define nft_trans_flowtable_update(trans) \
- (((struct nft_trans_flowtable *)trans->data)->update)
-#define nft_trans_flowtable_hooks(trans) \
- (((struct nft_trans_flowtable *)trans->data)->hook_list)
-#define nft_trans_flowtable_flags(trans) \
- (((struct nft_trans_flowtable *)trans->data)->flags)
+#define nft_trans_container_flowtable(t) \
+ container_of(t, struct nft_trans_flowtable, nft_trans)
+#define nft_trans_flowtable(trans) \
+ nft_trans_container_flowtable(trans)->flowtable
+#define nft_trans_flowtable_update(trans) \
+ nft_trans_container_flowtable(trans)->update
+#define nft_trans_flowtable_hooks(trans) \
+ nft_trans_container_flowtable(trans)->hook_list
+#define nft_trans_flowtable_flags(trans) \
+ nft_trans_container_flowtable(trans)->flags
#define NFT_TRANS_GC_BATCHCOUNT 256
@@ -1735,10 +1831,37 @@ struct nft_trans_gc {
struct nft_set *set;
u32 seq;
u16 count;
- void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT];
struct rcu_head rcu;
};
+static inline void nft_ctx_update(struct nft_ctx *ctx,
+ const struct nft_trans *trans)
+{
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWRULE:
+ case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
+ ctx->chain = nft_trans_rule_chain(trans);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
+ ctx->chain = nft_trans_chain(trans);
+ break;
+ default:
+ ctx->chain = NULL;
+ break;
+ }
+
+ ctx->net = trans->net;
+ ctx->table = trans->table;
+ ctx->family = trans->table->family;
+ ctx->report = trans->report;
+ ctx->flags = trans->flags;
+ ctx->seq = trans->seq;
+}
+
struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
unsigned int gc_seq, gfp_t gfp);
void nft_trans_gc_destroy(struct nft_trans_gc *trans);
@@ -1758,7 +1881,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc);
void nft_setelem_data_deactivate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1766,7 +1889,7 @@ void nft_chain_filter_fini(void);
void __init nft_chain_route_init(void);
void nft_chain_route_fini(void);
-void nf_tables_trans_destroy_flush_work(void);
+void nf_tables_trans_destroy_flush_work(struct net *net);
int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
__be64 nf_jiffies64_to_msecs(u64 input);
@@ -1780,14 +1903,18 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
struct nftables_pernet {
struct list_head tables;
struct list_head commit_list;
+ struct list_head destroy_list;
+ struct list_head commit_set_list;
struct list_head binding_list;
struct list_head module_list;
struct list_head notify_list;
struct mutex commit_mutex;
u64 table_handle;
+ u64 tstamp;
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
+ struct work_struct destroy_work;
};
extern unsigned int nf_tables_net_id;
@@ -1797,6 +1924,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net)
return net_generic(net, nf_tables_net_id);
}
+static inline u64 nft_net_tstamp(const struct net *net)
+{
+ return nft_pernet(net)->tstamp;
+}
+
#define __NFT_REDUCE_READONLY 1UL
#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 16855c2a03f8..03b6165756fc 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -93,7 +93,7 @@ extern const struct nft_set_type nft_set_bitmap_type;
extern const struct nft_set_type nft_set_pipapo_type;
extern const struct nft_set_type nft_set_pipapo_avx2_type;
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index faa108b1ba67..5adf6fda11e8 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -36,6 +36,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
/**
* nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
+ * @net: The network namespace.
* @skb: The skb being processed.
* @laddr: IPv4 address to redirect to or zero.
* @lport: TCP port to redirect to or zero.
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 167640b843ef..7370fba844ef 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -2,6 +2,7 @@
#ifndef _NFT_FIB_H_
#define _NFT_FIB_H_
+#include <net/l3mdev.h>
#include <net/netfilter/nf_tables.h>
struct nft_fib {
@@ -18,12 +19,39 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in)
return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
+static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sock *sk;
+
+ switch (nft_hook(pkt)) {
+ case NF_INET_PRE_ROUTING:
+ case NF_INET_INGRESS:
+ case NF_INET_LOCAL_IN:
+ break;
+ default:
+ return false;
+ }
+
+ sk = pkt->skb->sk;
+ if (sk && sk_fullsock(sk))
+ return sk->sk_rx_dst_ifindex == indev->ifindex;
+
+ return nft_fib_is_loopback(pkt->skb, indev);
+}
+
+static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt,
+ const struct net_device *iif)
+{
+ const struct net_device *dev = iif ? iif : pkt->skb->dev;
+
+ return l3mdev_master_ifindex_rcu(dev);
+}
+
int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset);
int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[]);
-int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data);
-
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr);
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt);
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index ba1238f12a48..d602263590fe 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -41,8 +41,7 @@ void nft_meta_set_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr);
int nft_meta_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data);
+ const struct nft_expr *expr);
bool nft_meta_get_reduce(struct nft_regs_track *track,
const struct nft_expr *expr);
diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index 6d9ba62efd75..19060212988a 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -15,8 +15,7 @@ struct nft_reject {
extern const struct nla_policy nft_reject_policy[];
int nft_reject_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data);
+ const struct nft_expr *expr);
int nft_reject_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
diff --git a/include/net/netkit.h b/include/net/netkit.h
new file mode 100644
index 000000000000..9ec0163739f4
--- /dev/null
+++ b/include/net/netkit.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef __NET_NETKIT_H
+#define __NET_NETKIT_H
+
+#include <linux/bpf.h>
+
+#ifdef CONFIG_NETKIT
+int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
+int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
+INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev));
+#else
+static inline int netkit_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int netkit_link_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int netkit_prog_detach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int netkit_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
+
+static inline struct net_device *netkit_peer_dev(struct net_device *dev)
+{
+ return NULL;
+}
+#endif /* CONFIG_NETKIT */
+#endif /* __NET_NETKIT_H */
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 43ae50337685..529160f76cac 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -30,7 +30,7 @@ struct calipso_doi;
/*
* NetLabel - A management interface for maintaining network packet label
- * mapping tables for explicit packet labling protocols.
+ * mapping tables for explicit packet labeling protocols.
*
* Network protocols such as CIPSO and RIPSO require a label translation layer
* to convert the label on the packet into something meaningful on the host
@@ -145,15 +145,14 @@ struct netlbl_lsm_cache {
* processing.
*
*/
-#define NETLBL_CATMAP_MAPTYPE u64
#define NETLBL_CATMAP_MAPCNT 4
-#define NETLBL_CATMAP_MAPSIZE (sizeof(NETLBL_CATMAP_MAPTYPE) * 8)
+#define NETLBL_CATMAP_MAPSIZE (sizeof(u64) * 8)
#define NETLBL_CATMAP_SIZE (NETLBL_CATMAP_MAPSIZE * \
NETLBL_CATMAP_MAPCNT)
-#define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01
+#define NETLBL_CATMAP_BIT ((u64)0x01)
struct netlbl_lsm_catmap {
u32 startbit;
- NETLBL_CATMAP_MAPTYPE bitmap[NETLBL_CATMAP_MAPCNT];
+ u64 bitmap[NETLBL_CATMAP_MAPCNT];
struct netlbl_lsm_catmap *next;
};
@@ -275,15 +274,17 @@ struct netlbl_calipso_ops {
* on success, NULL on failure.
*
*/
-static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
+static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc_noprof(gfp_t flags)
{
struct netlbl_lsm_cache *cache;
- cache = kzalloc(sizeof(*cache), flags);
+ cache = kzalloc_noprof(sizeof(*cache), flags);
if (cache)
refcount_set(&cache->refcount, 1);
return cache;
}
+#define netlbl_secattr_cache_alloc(...) \
+ alloc_hooks(netlbl_secattr_cache_alloc_noprof(__VA_ARGS__))
/**
* netlbl_secattr_cache_free - Frees a netlbl_lsm_cache struct
@@ -312,10 +313,11 @@ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache)
* on failure.
*
*/
-static inline struct netlbl_lsm_catmap *netlbl_catmap_alloc(gfp_t flags)
+static inline struct netlbl_lsm_catmap *netlbl_catmap_alloc_noprof(gfp_t flags)
{
- return kzalloc(sizeof(struct netlbl_lsm_catmap), flags);
+ return kzalloc_noprof(sizeof(struct netlbl_lsm_catmap), flags);
}
+#define netlbl_catmap_alloc(...) alloc_hooks(netlbl_catmap_alloc_noprof(__VA_ARGS__))
/**
* netlbl_catmap_free - Free a LSM secattr catmap
@@ -377,10 +379,11 @@ static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr)
* pointer on success, or NULL on failure.
*
*/
-static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(gfp_t flags)
+static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc_noprof(gfp_t flags)
{
- return kzalloc(sizeof(struct netlbl_lsm_secattr), flags);
+ return kzalloc_noprof(sizeof(struct netlbl_lsm_secattr), flags);
}
+#define netlbl_secattr_alloc(...) alloc_hooks(netlbl_secattr_alloc_noprof(__VA_ARGS__))
/**
* netlbl_secattr_free - Frees a netlbl_lsm_secattr struct
@@ -471,7 +474,8 @@ void netlbl_bitmap_setbit(unsigned char *bitmap, u32 bit, u8 state);
int netlbl_enabled(void);
int netlbl_sock_setattr(struct sock *sk,
u16 family,
- const struct netlbl_lsm_secattr *secattr);
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked);
void netlbl_sock_delattr(struct sock *sk);
int netlbl_sock_getattr(struct sock *sk,
struct netlbl_lsm_secattr *secattr);
@@ -488,6 +492,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
u16 family,
struct netlbl_lsm_secattr *secattr);
void netlbl_skbuff_err(struct sk_buff *skb, u16 family, int error, int gateway);
+bool netlbl_sk_lock_check(struct sock *sk);
/*
* LSM label mapping cache operations
@@ -615,7 +620,8 @@ static inline int netlbl_enabled(void)
}
static inline int netlbl_sock_setattr(struct sock *sk,
u16 family,
- const struct netlbl_lsm_secattr *secattr)
+ const struct netlbl_lsm_secattr *secattr,
+ bool sk_locked)
{
return -ENOSYS;
}
@@ -674,6 +680,11 @@ static inline struct audit_buffer *netlbl_audit_start(int type,
{
return NULL;
}
+
+static inline bool netlbl_sk_lock_check(struct sock *sk)
+{
+ return true;
+}
#endif /* CONFIG_NETLABEL */
const struct netlbl_calipso_ops *
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 8a7cd1170e1f..db6af207287c 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -41,7 +41,8 @@
* nlmsg_get_pos() return current position in message
* nlmsg_trim() trim part of message
* nlmsg_cancel() cancel message construction
- * nlmsg_free() free a netlink message
+ * nlmsg_consume() free a netlink message (expected)
+ * nlmsg_free() free a netlink message (drop)
*
* Message Sending:
* nlmsg_multicast() multicast message to several groups
@@ -128,6 +129,8 @@
* nla_len(nla) length of attribute payload
*
* Attribute Payload Access for Basic Types:
+ * nla_get_uint(nla) get payload for a uint attribute
+ * nla_get_sint(nla) get payload for a sint attribute
* nla_get_u8(nla) get payload for a u8 attribute
* nla_get_u16(nla) get payload for a u16 attribute
* nla_get_u32(nla) get payload for a u32 attribute
@@ -155,7 +158,11 @@
* nla_parse() parse and validate stream of attrs
* nla_parse_nested() parse nested attributes
* nla_for_each_attr() loop over all attributes
+ * nla_for_each_attr_type() loop over all attributes with the
+ * given type
* nla_for_each_nested() loop over the nested attributes
+ * nla_for_each_nested_type() loop over the nested attributes with
+ * the given type
*=========================================================================
*/
@@ -183,6 +190,8 @@ enum {
NLA_REJECT,
NLA_BE16,
NLA_BE32,
+ NLA_SINT,
+ NLA_UINT,
__NLA_TYPE_MAX,
};
@@ -229,6 +238,7 @@ enum nla_policy_validation {
* nested header (or empty); len field is used if
* nested_policy is also used, for the max attr
* number in the nested policy.
+ * NLA_SINT, NLA_UINT,
* NLA_U8, NLA_U16,
* NLA_U32, NLA_U64,
* NLA_S8, NLA_S16,
@@ -260,12 +270,14 @@ enum nla_policy_validation {
* while an array has the nested attributes at another
* level down and the attribute types directly in the
* nesting don't matter.
+ * NLA_UINT,
* NLA_U8,
* NLA_U16,
* NLA_U32,
* NLA_U64,
* NLA_BE16,
* NLA_BE32,
+ * NLA_SINT,
* NLA_S8,
* NLA_S16,
* NLA_S32,
@@ -280,6 +292,7 @@ enum nla_policy_validation {
* or NLA_POLICY_FULL_RANGE_SIGNED() macros instead.
* Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and
* NLA_POLICY_RANGE() macros.
+ * NLA_UINT,
* NLA_U8,
* NLA_U16,
* NLA_U32,
@@ -288,6 +301,7 @@ enum nla_policy_validation {
* to a struct netlink_range_validation that indicates
* the min/max values.
* Use NLA_POLICY_FULL_RANGE().
+ * NLA_SINT,
* NLA_S8,
* NLA_S16,
* NLA_S32,
@@ -351,8 +365,8 @@ struct nla_policy {
const u32 mask;
const char *reject_message;
const struct nla_policy *nested_policy;
- struct netlink_range_validation *range;
- struct netlink_range_validation_signed *range_signed;
+ const struct netlink_range_validation *range;
+ const struct netlink_range_validation_signed *range_signed;
struct {
s16 min, max;
};
@@ -377,9 +391,11 @@ struct nla_policy {
#define __NLA_IS_UINT_TYPE(tp) \
(tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || \
- tp == NLA_U64 || tp == NLA_BE16 || tp == NLA_BE32)
+ tp == NLA_U64 || tp == NLA_UINT || \
+ tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_IS_SINT_TYPE(tp) \
- (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
+ (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64 || \
+ tp == NLA_SINT)
#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
#define NLA_ENSURE_UINT_TYPE(tp) \
@@ -811,7 +827,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen,
/**
* nlmsg_find_attr - find a specific attribute in a netlink message
* @nlh: netlink message header
- * @hdrlen: length of familiy specific header
+ * @hdrlen: length of family specific header
* @attrtype: type of attribute to look for
*
* Returns the first attribute which matches the specified type.
@@ -833,7 +849,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
*
* Validates all attributes in the specified attribute stream against the
* specified policy. Validation is done in liberal mode.
- * See documenation of struct nla_policy for more details.
+ * See documentation of struct nla_policy for more details.
*
* Returns 0 on success or a negative error code.
*/
@@ -856,7 +872,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len,
*
* Validates all attributes in the specified attribute stream against the
* specified policy. Validation is done in strict mode.
- * See documenation of struct nla_policy for more details.
+ * See documentation of struct nla_policy for more details.
*
* Returns 0 on success or a negative error code.
*/
@@ -871,7 +887,7 @@ static inline int nla_validate(const struct nlattr *head, int len, int maxtype,
/**
* nlmsg_validate_deprecated - validate a netlink message including attributes
* @nlh: netlinket message header
- * @hdrlen: length of familiy specific header
+ * @hdrlen: length of family specific header
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
* @extack: extended ACK report struct
@@ -917,7 +933,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
* nlmsg_for_each_attr - iterate over a stream of attributes
* @pos: loop counter, set to current attribute
* @nlh: netlink message header
- * @hdrlen: length of familiy specific header
+ * @hdrlen: length of family specific header
* @rem: initialized to len, holds bytes currently remaining in stream
*/
#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \
@@ -1000,11 +1016,25 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
}
/**
+ * nlmsg_new_large - Allocate a new netlink message with non-contiguous
+ * physical memory
+ * @payload: size of the message payload
+ *
+ * The allocated skb is unable to have frag page for shinfo->frags*,
+ * as the NULL setting for skb->head in netlink_skb_destructor() will
+ * bypass most of the handling in skb_release_data()
+ */
+static inline struct sk_buff *nlmsg_new_large(size_t payload)
+{
+ return netlink_alloc_large_skb(nlmsg_total_size(payload), 0);
+}
+
+/**
* nlmsg_end - Finalize a netlink message
* @skb: socket buffer the message is stored in
* @nlh: netlink message header
*
- * Corrects the netlink message header to include the appeneded
+ * Corrects the netlink message header to include the appended
* attributes. Only necessary if attributes have been added to
* the message.
*/
@@ -1053,7 +1083,7 @@ static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
}
/**
- * nlmsg_free - free a netlink message
+ * nlmsg_free - drop a netlink message
* @skb: socket buffer of netlink message
*/
static inline void nlmsg_free(struct sk_buff *skb)
@@ -1062,21 +1092,38 @@ static inline void nlmsg_free(struct sk_buff *skb)
}
/**
- * nlmsg_multicast - multicast a netlink message
+ * nlmsg_consume - free a netlink message
+ * @skb: socket buffer of netlink message
+ */
+static inline void nlmsg_consume(struct sk_buff *skb)
+{
+ consume_skb(skb);
+}
+
+/**
+ * nlmsg_multicast_filtered - multicast a netlink message with filter function
* @sk: netlink socket to spread messages to
* @skb: netlink message as socket buffer
* @portid: own netlink portid to avoid sending to yourself
* @group: multicast group id
* @flags: allocation flags
+ * @filter: filter function
+ * @filter_data: filter function private data
+ *
+ * Return: 0 on success, negative error code for failure.
*/
-static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
- u32 portid, unsigned int group, gfp_t flags)
+static inline int nlmsg_multicast_filtered(struct sock *sk, struct sk_buff *skb,
+ u32 portid, unsigned int group,
+ gfp_t flags,
+ netlink_filter_fn filter,
+ void *filter_data)
{
int err;
NETLINK_CB(skb).dst_group = group;
- err = netlink_broadcast(sk, skb, portid, group, flags);
+ err = netlink_broadcast_filtered(sk, skb, portid, group, flags,
+ filter, filter_data);
if (err > 0)
err = 0;
@@ -1084,6 +1131,21 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
}
/**
+ * nlmsg_multicast - multicast a netlink message
+ * @sk: netlink socket to spread messages to
+ * @skb: netlink message as socket buffer
+ * @portid: own netlink portid to avoid sending to yourself
+ * @group: multicast group id
+ * @flags: allocation flags
+ */
+static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
+ u32 portid, unsigned int group, gfp_t flags)
+{
+ return nlmsg_multicast_filtered(sk, skb, portid, group, flags,
+ NULL, NULL);
+}
+
+/**
* nlmsg_unicast - unicast a netlink message
* @sk: netlink socket to spread message to
* @skb: netlink message as socket buffer
@@ -1189,7 +1251,7 @@ static inline void *nla_data(const struct nlattr *nla)
* nla_len - length of payload
* @nla: netlink attribute
*/
-static inline int nla_len(const struct nlattr *nla)
+static inline u16 nla_len(const struct nlattr *nla)
{
return nla->nla_len - NLA_HDRLEN;
}
@@ -1358,6 +1420,22 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
}
/**
+ * nla_put_uint - Add a variable-size unsigned int to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_uint(struct sk_buff *skb, int attrtype, u64 value)
+{
+ u64 tmp64 = value;
+ u32 tmp32 = value;
+
+ if (tmp64 == tmp32)
+ return nla_put_u32(skb, attrtype, tmp32);
+ return nla_put(skb, attrtype, sizeof(u64), &tmp64);
+}
+
+/**
* nla_put_be32 - Add a __be32 netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
@@ -1512,6 +1590,22 @@ static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value,
}
/**
+ * nla_put_sint - Add a variable-size signed int to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_sint(struct sk_buff *skb, int attrtype, s64 value)
+{
+ s64 tmp64 = value;
+ s32 tmp32 = value;
+
+ if (tmp64 == tmp32)
+ return nla_put_s32(skb, attrtype, tmp32);
+ return nla_put(skb, attrtype, sizeof(s64), &tmp64);
+}
+
+/**
* nla_put_string - Add a string netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
@@ -1668,6 +1762,17 @@ static inline u64 nla_get_u64(const struct nlattr *nla)
}
/**
+ * nla_get_uint - return payload of uint attribute
+ * @nla: uint netlink attribute
+ */
+static inline u64 nla_get_uint(const struct nlattr *nla)
+{
+ if (nla_len(nla) == sizeof(u32))
+ return nla_get_u32(nla);
+ return nla_get_u64(nla);
+}
+
+/**
* nla_get_be64 - return payload of __be64 attribute
* @nla: __be64 netlink attribute
*/
@@ -1730,6 +1835,17 @@ static inline s64 nla_get_s64(const struct nlattr *nla)
}
/**
+ * nla_get_sint - return payload of uint attribute
+ * @nla: uint netlink attribute
+ */
+static inline s64 nla_get_sint(const struct nlattr *nla)
+{
+ if (nla_len(nla) == sizeof(s32))
+ return nla_get_s32(nla);
+ return nla_get_s64(nla);
+}
+
+/**
* nla_get_flag - return payload of flag attribute
* @nla: flag netlink attribute
*/
@@ -1789,10 +1905,11 @@ static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla)
* @src: netlink attribute to duplicate from
* @gfp: GFP mask
*/
-static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp)
+static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp)
{
- return kmemdup(nla_data(src), nla_len(src), gfp);
+ return kmemdup_noprof(nla_data(src), nla_len(src), gfp);
}
+#define nla_memdup(...) alloc_hooks(nla_memdup_noprof(__VA_ARGS__))
/**
* nla_nest_start_noflag - Start a new level of nested attributes
@@ -1837,7 +1954,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
* @start: container attribute
*
* Corrects the container attribute header to include the all
- * appeneded attributes.
+ * appended attributes.
*
* Returns the total data length of the skb.
*/
@@ -1870,7 +1987,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
*
* Validates all attributes in the nested attribute stream against the
* specified policy. Attributes with a type exceeding maxtype will be
- * ignored. See documenation of struct nla_policy for more details.
+ * ignored. See documentation of struct nla_policy for more details.
*
* Returns 0 on success or a negative error code.
*/
@@ -1969,6 +2086,18 @@ static inline int nla_total_size_64bit(int payload)
pos = nla_next(pos, &(rem)))
/**
+ * nla_for_each_attr_type - iterate over a stream of attributes
+ * @pos: loop counter, set to current attribute
+ * @type: required attribute type for @pos
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_attr_type(pos, type, head, len, rem) \
+ nla_for_each_attr(pos, head, len, rem) \
+ if (nla_type(pos) == type)
+
+/**
* nla_for_each_nested - iterate over nested attributes
* @pos: loop counter, set to current attribute
* @nla: attribute containing the nested attributes
@@ -1978,6 +2107,17 @@ static inline int nla_total_size_64bit(int payload)
nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem)
/**
+ * nla_for_each_nested_type - iterate over nested attributes
+ * @pos: loop counter, set to current attribute
+ * @type: required attribute type for @pos
+ * @nla: attribute containing the nested attributes
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_nested_type(pos, type, nla, rem) \
+ nla_for_each_nested(pos, nla, rem) \
+ if (nla_type(pos) == type)
+
+/**
* nla_is_last - Test if attribute is last in stream
* @nla: attribute to test
* @rem: bytes remaining in stream
diff --git a/include/net/netmem.h b/include/net/netmem.h
new file mode 100644
index 000000000000..8a6e20be4b9d
--- /dev/null
+++ b/include/net/netmem.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Network memory
+ *
+ * Author: Mina Almasry <almasrymina@google.com>
+ */
+
+#ifndef _NET_NETMEM_H
+#define _NET_NETMEM_H
+
+#include <linux/mm.h>
+#include <net/net_debug.h>
+
+/* net_iov */
+
+DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
+/* We overload the LSB of the struct page pointer to indicate whether it's
+ * a page or net_iov.
+ */
+#define NET_IOV 0x01UL
+
+struct net_iov {
+ unsigned long __unused_padding;
+ unsigned long pp_magic;
+ struct page_pool *pp;
+ struct dmabuf_genpool_chunk_owner *owner;
+ unsigned long dma_addr;
+ atomic_long_t pp_ref_count;
+};
+
+/* These fields in struct page are used by the page_pool and net stack:
+ *
+ * struct {
+ * unsigned long pp_magic;
+ * struct page_pool *pp;
+ * unsigned long _pp_mapping_pad;
+ * unsigned long dma_addr;
+ * atomic_long_t pp_ref_count;
+ * };
+ *
+ * We mirror the page_pool fields here so the page_pool can access these fields
+ * without worrying whether the underlying fields belong to a page or net_iov.
+ *
+ * The non-net stack fields of struct page are private to the mm stack and must
+ * never be mirrored to net_iov.
+ */
+#define NET_IOV_ASSERT_OFFSET(pg, iov) \
+ static_assert(offsetof(struct page, pg) == \
+ offsetof(struct net_iov, iov))
+NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
+NET_IOV_ASSERT_OFFSET(pp, pp);
+NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
+NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
+#undef NET_IOV_ASSERT_OFFSET
+
+/* netmem */
+
+/**
+ * typedef netmem_ref - a nonexistent type marking a reference to generic
+ * network memory.
+ *
+ * A netmem_ref currently is always a reference to a struct page. This
+ * abstraction is introduced so support for new memory types can be added.
+ *
+ * Use the supplied helpers to obtain the underlying memory pointer and fields.
+ */
+typedef unsigned long __bitwise netmem_ref;
+
+static inline bool netmem_is_net_iov(const netmem_ref netmem)
+{
+ return (__force unsigned long)netmem & NET_IOV;
+}
+
+/* This conversion fails (returns NULL) if the netmem_ref is not struct page
+ * backed.
+ */
+static inline struct page *netmem_to_page(netmem_ref netmem)
+{
+ if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
+ return NULL;
+
+ return (__force struct page *)netmem;
+}
+
+static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return (struct net_iov *)((__force unsigned long)netmem &
+ ~NET_IOV);
+
+ DEBUG_NET_WARN_ON_ONCE(true);
+ return NULL;
+}
+
+static inline netmem_ref net_iov_to_netmem(struct net_iov *niov)
+{
+ return (__force netmem_ref)((unsigned long)niov | NET_IOV);
+}
+
+static inline netmem_ref page_to_netmem(struct page *page)
+{
+ return (__force netmem_ref)page;
+}
+
+static inline int netmem_ref_count(netmem_ref netmem)
+{
+ /* The non-pp refcount of net_iov is always 1. On net_iov, we only
+ * support pp refcounting which uses the pp_ref_count field.
+ */
+ if (netmem_is_net_iov(netmem))
+ return 1;
+
+ return page_ref_count(netmem_to_page(netmem));
+}
+
+static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return 0;
+
+ return page_to_pfn(netmem_to_page(netmem));
+}
+
+static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
+{
+ return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
+}
+
+static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->pp;
+}
+
+static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
+{
+ return &__netmem_clear_lsb(netmem)->pp_ref_count;
+}
+
+static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
+{
+ /* NUMA node preference only makes sense if we're allocating
+ * system memory. Memory providers (which give us net_iovs)
+ * choose for us.
+ */
+ if (netmem_is_net_iov(netmem))
+ return true;
+
+ return page_to_nid(netmem_to_page(netmem)) == pref_nid;
+}
+
+static inline netmem_ref netmem_compound_head(netmem_ref netmem)
+{
+ /* niov are never compounded */
+ if (netmem_is_net_iov(netmem))
+ return netmem;
+
+ return page_to_netmem(compound_head(netmem_to_page(netmem)));
+}
+
+static inline void *netmem_address(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return NULL;
+
+ return page_address(netmem_to_page(netmem));
+}
+
+static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->dma_addr;
+}
+
+#endif /* _NET_NETMEM_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 1f463b3957c7..bae914815aa3 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -107,7 +107,7 @@ struct netns_ct {
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
- unsigned int labels_used;
+ atomic_t labels_used;
#endif
};
#endif
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index a91ef9f8de60..78214f1b43a2 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -13,6 +13,7 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ int sysctl_optmem_max;
u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7a41c4791536..276f622f3516 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -19,8 +19,7 @@ struct hlist_head;
struct fib_table;
struct sock;
struct local_ports {
- seqlock_t lock;
- int range[2];
+ u32 range; /* high << 16 | low */
bool warned;
};
@@ -41,7 +40,46 @@ struct inet_timewait_death_row {
struct tcp_fastopen_context;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+struct sysctl_fib_multipath_hash_seed {
+ u32 user_seed;
+ u32 mp_seed;
+};
+#endif
+
struct netns_ipv4 {
+ /* Cacheline organization can be found documented in
+ * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
+ * Please update the document when adding new fields.
+ */
+
+ /* TX readonly hotpath cache lines */
+ __cacheline_group_begin(netns_ipv4_read_tx);
+ u8 sysctl_tcp_early_retrans;
+ u8 sysctl_tcp_tso_win_divisor;
+ u8 sysctl_tcp_tso_rtt_log;
+ u8 sysctl_tcp_autocorking;
+ int sysctl_tcp_min_snd_mss;
+ unsigned int sysctl_tcp_notsent_lowat;
+ int sysctl_tcp_limit_output_bytes;
+ int sysctl_tcp_min_rtt_wlen;
+ int sysctl_tcp_wmem[3];
+ u8 sysctl_ip_fwd_use_pmtu;
+ __cacheline_group_end(netns_ipv4_read_tx);
+
+ /* TXRX readonly hotpath cache lines */
+ __cacheline_group_begin(netns_ipv4_read_txrx);
+ u8 sysctl_tcp_moderate_rcvbuf;
+ __cacheline_group_end(netns_ipv4_read_txrx);
+
+ /* RX readonly hotpath cache line */
+ __cacheline_group_begin(netns_ipv4_read_rx);
+ u8 sysctl_ip_early_demux;
+ u8 sysctl_tcp_early_demux;
+ int sysctl_tcp_reordering;
+ int sysctl_tcp_rmem[3];
+ __cacheline_group_end(netns_ipv4_read_rx);
+
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;
@@ -84,7 +122,10 @@ struct netns_ipv4 {
u8 sysctl_icmp_errors_use_inbound_ifaddr;
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
-
+ int sysctl_icmp_msgs_per_sec;
+ int sysctl_icmp_msgs_burst;
+ atomic_t icmp_global_credit;
+ u32 icmp_global_stamp;
u32 ip_rt_min_pmtu;
int ip_rt_mtu_expires;
int ip_rt_min_advmss;
@@ -96,17 +137,14 @@ struct netns_ipv4 {
u8 sysctl_ip_default_ttl;
u8 sysctl_ip_no_pmtu_disc;
- u8 sysctl_ip_fwd_use_pmtu;
u8 sysctl_ip_fwd_update_priority;
u8 sysctl_ip_nonlocal_bind;
u8 sysctl_ip_autobind_reuse;
/* Shall we try to damage output packets if routing dev changes? */
u8 sysctl_ip_dynaddr;
- u8 sysctl_ip_early_demux;
#ifdef CONFIG_NET_L3_MASTER_DEV
u8 sysctl_raw_l3mdev_accept;
#endif
- u8 sysctl_tcp_early_demux;
u8 sysctl_udp_early_demux;
u8 sysctl_nexthop_compat_mode;
@@ -119,7 +157,6 @@ struct netns_ipv4 {
u8 sysctl_tcp_mtu_probing;
int sysctl_tcp_mtu_probe_floor;
int sysctl_tcp_base_mss;
- int sysctl_tcp_min_snd_mss;
int sysctl_tcp_probe_threshold;
u32 sysctl_tcp_probe_interval;
@@ -132,17 +169,18 @@ struct netns_ipv4 {
u8 sysctl_tcp_syncookies;
u8 sysctl_tcp_migrate_req;
u8 sysctl_tcp_comp_sack_nr;
- int sysctl_tcp_reordering;
+ u8 sysctl_tcp_backlog_ack_defer;
+ u8 sysctl_tcp_pingpong_thresh;
+
u8 sysctl_tcp_retries1;
u8 sysctl_tcp_retries2;
u8 sysctl_tcp_orphan_retries;
u8 sysctl_tcp_tw_reuse;
int sysctl_tcp_fin_timeout;
- unsigned int sysctl_tcp_notsent_lowat;
u8 sysctl_tcp_sack;
u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps;
- u8 sysctl_tcp_early_retrans;
+ int sysctl_tcp_rto_min_us;
u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle;
@@ -158,21 +196,13 @@ struct netns_ipv4 {
u8 sysctl_tcp_frto;
u8 sysctl_tcp_nometrics_save;
u8 sysctl_tcp_no_ssthresh_metrics_save;
- u8 sysctl_tcp_moderate_rcvbuf;
- u8 sysctl_tcp_tso_win_divisor;
u8 sysctl_tcp_workaround_signed_windows;
- int sysctl_tcp_limit_output_bytes;
int sysctl_tcp_challenge_ack_limit;
- int sysctl_tcp_min_rtt_wlen;
u8 sysctl_tcp_min_tso_segs;
- u8 sysctl_tcp_tso_rtt_log;
- u8 sysctl_tcp_autocorking;
u8 sysctl_tcp_reflect_tos;
int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
- int sysctl_tcp_wmem[3];
- int sysctl_tcp_rmem[3];
unsigned int sysctl_tcp_child_ehash_entries;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
@@ -226,6 +256,7 @@ struct netns_ipv4 {
#endif
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed;
u32 sysctl_fib_multipath_hash_fields;
u8 sysctl_fib_multipath_use_neigh;
u8 sysctl_fib_multipath_hash_policy;
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index 7eff3d981b89..d25cd7a9c5ff 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -125,14 +125,14 @@ struct netns_sctp {
int pf_expose;
/*
- * Policy for preforming sctp/socket accounting
+ * Policy for performing sctp/socket accounting
* 0 - do socket level accounting, all assocs share sk_sndbuf
* 1 - do sctp accounting, each asoc may use sk_sndbuf bytes
*/
int sndbuf_policy;
/*
- * Policy for preforming sctp/socket accounting
+ * Policy for performing sctp/socket accounting
* 0 - do socket level accounting, all assocs share sk_rcvbuf
* 1 - do sctp accounting, each asoc may use sk_rcvbuf bytes
*/
diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
index 582212ada3ba..fc752a50f91b 100644
--- a/include/net/netns/smc.h
+++ b/include/net/netns/smc.h
@@ -22,5 +22,7 @@ struct netns_smc {
int sysctl_smcr_testlink_time;
int sysctl_wmem;
int sysctl_rmem;
+ int sysctl_max_links_per_lgr;
+ int sysctl_max_conns_per_lgr;
};
#endif
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 423b52eca908..23dd647fe024 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -43,6 +43,7 @@ struct netns_xfrm {
struct hlist_head __rcu *state_bysrc;
struct hlist_head __rcu *state_byspi;
struct hlist_head __rcu *state_byseq;
+ struct hlist_head __percpu *state_cache_input;
unsigned int state_hmask;
unsigned int state_num;
struct work_struct state_hash_work;
@@ -51,7 +52,6 @@ struct netns_xfrm {
struct hlist_head *policy_byidx;
unsigned int policy_idx_hmask;
unsigned int idx_generator;
- struct hlist_head policy_inexact[XFRM_POLICY_MAX];
struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX];
unsigned int policy_count[XFRM_POLICY_MAX * 2];
struct work_struct policy_hash_work;
@@ -83,6 +83,7 @@ struct netns_xfrm {
spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
+ struct delayed_work nat_keepalive_work;
};
#endif
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 2b12725de9c0..d9fb44e8b321 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -47,6 +47,8 @@ struct nh_config {
bool nh_grp_res_has_idle_timer;
bool nh_grp_res_has_unbalanced_timer;
+ bool nh_hw_stats;
+
struct nlattr *nh_encap;
u16 nh_encap_type;
@@ -92,12 +94,18 @@ struct nh_res_table {
u32 unbalanced_timer;
u16 num_nh_buckets;
- struct nh_res_bucket nh_buckets[];
+ struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
+};
+
+struct nh_grp_entry_stats {
+ u64_stats_t packets;
+ struct u64_stats_sync syncp;
};
struct nh_grp_entry {
struct nexthop *nh;
- u8 weight;
+ struct nh_grp_entry_stats __percpu *stats;
+ u16 weight;
union {
struct {
@@ -114,6 +122,7 @@ struct nh_grp_entry {
struct list_head nh_list;
struct nexthop *nh_parent; /* nexthop of group with this entry */
+ u64 packets_hw;
};
struct nh_group {
@@ -124,9 +133,10 @@ struct nh_group {
bool resilient;
bool fdb_nh;
bool has_v4;
+ bool hw_stats;
struct nh_res_table __rcu *res_table;
- struct nh_grp_entry nh_entries[];
+ struct nh_grp_entry nh_entries[] __counted_by(num_nh);
};
struct nexthop {
@@ -157,6 +167,7 @@ enum nexthop_event_type {
NEXTHOP_EVENT_REPLACE,
NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
NEXTHOP_EVENT_BUCKET_REPLACE,
+ NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
};
enum nh_notifier_info_type {
@@ -164,6 +175,7 @@ enum nh_notifier_info_type {
NH_NOTIFIER_INFO_TYPE_GRP,
NH_NOTIFIER_INFO_TYPE_RES_TABLE,
NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
+ NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
};
struct nh_notifier_single_info {
@@ -173,21 +185,22 @@ struct nh_notifier_single_info {
__be32 ipv4;
struct in6_addr ipv6;
};
+ u32 id;
u8 is_reject:1,
is_fdb:1,
has_encap:1;
};
struct nh_notifier_grp_entry_info {
- u8 weight;
- u32 id;
+ u16 weight;
struct nh_notifier_single_info nh;
};
struct nh_notifier_grp_info {
u16 num_nh;
bool is_fdb;
- struct nh_notifier_grp_entry_info nh_entries[];
+ bool hw_stats;
+ struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
};
struct nh_notifier_res_bucket_info {
@@ -200,7 +213,19 @@ struct nh_notifier_res_bucket_info {
struct nh_notifier_res_table_info {
u16 num_nh_buckets;
- struct nh_notifier_single_info nhs[];
+ bool hw_stats;
+ struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
+};
+
+struct nh_notifier_grp_hw_stats_entry_info {
+ u32 id;
+ u64 packets;
+};
+
+struct nh_notifier_grp_hw_stats_info {
+ u16 num_nh;
+ bool hw_stats_used;
+ struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
};
struct nh_notifier_info {
@@ -213,17 +238,22 @@ struct nh_notifier_info {
struct nh_notifier_grp_info *nh_grp;
struct nh_notifier_res_table_info *nh_res_table;
struct nh_notifier_res_bucket_info *nh_res_bucket;
+ struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
};
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
bool offload, bool trap);
void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
unsigned long *activity);
+void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
+ unsigned int nh_idx,
+ u64 delta_packets);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
@@ -237,7 +267,7 @@ static inline bool nexthop_get(struct nexthop *nh)
static inline void nexthop_put(struct nexthop *nh)
{
if (refcount_dec_and_test(&nh->refcnt))
- call_rcu(&nh->rcu, nexthop_free_rcu);
+ call_rcu_hurry(&nh->rcu, nexthop_free_rcu);
}
static inline bool nexthop_cmp(const struct nexthop *nh1,
@@ -316,7 +346,7 @@ static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
u8 rt_family)
{
- struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
int i;
for (i = 0; i < nhg->num_nh; i++) {
diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index e82f55f543bb..dc36519d16aa 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -332,7 +332,7 @@ struct nci_core_init_rsp_1 {
__le32 nfcc_features;
__u8 num_supported_rf_interfaces;
__u8 supported_rf_interfaces[]; /* variable size array */
- /* continuted in nci_core_init_rsp_2 */
+ /* continued in nci_core_init_rsp_2 */
} __packed;
struct nci_core_init_rsp_2 {
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 5dee575fbe86..3a3781838c67 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -80,7 +80,7 @@ struct nfc_ops {
#define NFC_ATR_REQ_GT_OFFSET 14
/**
- * struct nfc_target - NFC target descriptiom
+ * struct nfc_target - NFC target description
*
* @sens_res: 2 bytes describing the target SENS_RES response, if the target
* is a type A one. The %sens_res most significant byte must be byte 2
@@ -196,7 +196,7 @@ struct nfc_dev {
};
#define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
-extern struct class nfc_class;
+extern const struct class nfc_class;
struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
@@ -230,10 +230,10 @@ static inline void nfc_set_parent_dev(struct nfc_dev *nfc_dev,
}
/**
- * nfc_set_drvdata - set driver specifc data
+ * nfc_set_drvdata - set driver specific data
*
* @dev: The nfc device
- * @data: Pointer to driver specifc data
+ * @data: Pointer to driver specific data
*/
static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data)
{
@@ -241,7 +241,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data)
}
/**
- * nfc_get_drvdata - get driver specifc data
+ * nfc_get_drvdata - get driver specific data
*
* @dev: The nfc device
*/
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 8cd9d141f5af..a994dea74596 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -78,6 +78,10 @@ enum nl802154_commands {
NL802154_CMD_SCAN_DONE,
NL802154_CMD_SEND_BEACONS,
NL802154_CMD_STOP_BEACONS,
+ NL802154_CMD_ASSOCIATE,
+ NL802154_CMD_DISASSOCIATE,
+ NL802154_CMD_SET_MAX_ASSOCIATIONS,
+ NL802154_CMD_LIST_ASSOCIATIONS,
/* add new commands above here */
@@ -147,6 +151,8 @@ enum nl802154_attrs {
NL802154_ATTR_SCAN_DURATION,
NL802154_ATTR_SCAN_DONE_REASON,
NL802154_ATTR_BEACON_INTERVAL,
+ NL802154_ATTR_MAX_ASSOCIATIONS,
+ NL802154_ATTR_PEER,
/* add attributes here, update the policy in nl802154.c */
@@ -186,7 +192,7 @@ enum nl802154_iftype {
* @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for
* nl802154_wpan_phy_tx_power
* @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level
- * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maxmimum value for cca_ed_level
+ * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maximum value for cca_ed_level
* @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags
* @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags
* @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value
@@ -385,8 +391,6 @@ enum nl802154_supported_bool_states {
NL802154_SUPPORTED_BOOL_MAX = __NL802154_SUPPORTED_BOOL_AFTER_LAST - 1
};
-#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
-
enum nl802154_dev_addr_modes {
NL802154_DEV_ADDR_NONE,
__NL802154_DEV_ADDR_INVALID,
@@ -406,12 +410,26 @@ enum nl802154_dev_addr_attrs {
NL802154_DEV_ADDR_ATTR_SHORT,
NL802154_DEV_ADDR_ATTR_EXTENDED,
NL802154_DEV_ADDR_ATTR_PAD,
+ NL802154_DEV_ADDR_ATTR_PEER_TYPE,
/* keep last */
__NL802154_DEV_ADDR_ATTR_AFTER_LAST,
NL802154_DEV_ADDR_ATTR_MAX = __NL802154_DEV_ADDR_ATTR_AFTER_LAST - 1
};
+enum nl802154_peer_type {
+ NL802154_PEER_TYPE_UNSPEC,
+
+ NL802154_PEER_TYPE_PARENT,
+ NL802154_PEER_TYPE_CHILD,
+
+ /* keep last */
+ __NL802154_PEER_TYPE_AFTER_LAST,
+ NL802154_PEER_TYPE_MAX = __NL802154_PEER_TYPE_AFTER_LAST - 1
+};
+
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+
enum nl802154_key_id_modes {
NL802154_KEY_ID_MODE_IMPLICIT,
NL802154_KEY_ID_MODE_INDEX,
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 8e7751464ff5..60a5347922be 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -8,40 +8,63 @@
/**
* DOC: page_pool allocator
*
- * The page_pool allocator is optimized for the XDP mode that
- * uses one frame per-page, but it can fallback on the
- * regular page allocator APIs.
- *
- * Basic use involves replacing alloc_pages() calls with the
- * page_pool_alloc_pages() call. Drivers should use
- * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
- *
- * The API keeps track of in-flight pages, in order to let API users know
- * when it is safe to free a page_pool object. Thus, API users
- * must call page_pool_put_page() to free the page, or attach
- * the page to a page_pool-aware object like skbs marked with
+ * The page_pool allocator is optimized for recycling page or page fragment used
+ * by skb packet and xdp frame.
+ *
+ * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(),
+ * which allocate memory with or without page splitting depending on the
+ * requested memory size.
+ *
+ * If the driver knows that it always requires full pages or its allocations are
+ * always smaller than half a page, it can use one of the more specific API
+ * calls:
+ *
+ * 1. page_pool_alloc_pages(): allocate memory without page splitting when
+ * driver knows that the memory it need is always bigger than half of the page
+ * allocated from page pool. There is no cache line dirtying for 'struct page'
+ * when a page is recycled back to the page pool.
+ *
+ * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver
+ * knows that the memory it need is always smaller than or equal to half of the
+ * page allocated from page pool. Page splitting enables memory saving and thus
+ * avoids TLB/cache miss for data access, but there also is some cost to
+ * implement page splitting, mainly some cache line dirtying/bouncing for
+ * 'struct page' and atomic operation for page->pp_ref_count.
+ *
+ * The API keeps track of in-flight pages, in order to let API users know when
+ * it is safe to free a page_pool object, the API users must call
+ * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or
+ * attach the page_pool object to a page_pool-aware object like skbs marked with
* skb_mark_for_recycle().
*
- * API users must call page_pool_put_page() once on a page, as it
- * will either recycle the page, or in case of refcnt > 1, it will
- * release the DMA mapping and in-flight state accounting.
+ * page_pool_put_page() may be called multiple times on the same page if a page
+ * is split into multiple fragments. For the last fragment, it will either
+ * recycle the page, or in case of page->_refcount > 1, it will release the DMA
+ * mapping and in-flight state accounting.
+ *
+ * dma_sync_single_range_for_device() is only called for the last fragment when
+ * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
+ * last freed fragment to do the sync_for_device operation for all fragments in
+ * the same page when a page is split. The API user must setup pool->p.max_len
+ * and pool->p.offset correctly and ensure that page_pool_put_page() is called
+ * with dma_sync_size being -1 for fragment API.
*/
#ifndef _NET_PAGE_POOL_HELPERS_H
#define _NET_PAGE_POOL_HELPERS_H
+#include <linux/dma-mapping.h>
+
#include <net/page_pool/types.h>
+#include <net/net_debug.h>
+#include <net/netmem.h>
#ifdef CONFIG_PAGE_POOL_STATS
+/* Deprecated driver-facing API, use netlink instead */
int page_pool_ethtool_stats_get_count(void);
u8 *page_pool_ethtool_stats_get_strings(u8 *data);
-u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
+u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats);
-/*
- * Drivers that wish to harvest page pool stats and report them to users
- * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
- * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
- */
-bool page_pool_get_stats(struct page_pool *pool,
+bool page_pool_get_stats(const struct page_pool *pool,
struct page_pool_stats *stats);
#else
static inline int page_pool_ethtool_stats_get_count(void)
@@ -54,7 +77,7 @@ static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
return data;
}
-static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+static inline u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
{
return data;
}
@@ -73,6 +96,17 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
return page_pool_alloc_pages(pool, gfp);
}
+/**
+ * page_pool_dev_alloc_frag() - allocate a page fragment.
+ * @pool: pool from which to allocate
+ * @offset: offset to the allocated page
+ * @size: requested size
+ *
+ * Get a page fragment from the page allocator or page_pool caches.
+ *
+ * Return:
+ * Return allocated page fragment, otherwise return NULL.
+ */
static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
unsigned int *offset,
unsigned int size)
@@ -82,6 +116,91 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
return page_pool_alloc_frag(pool, offset, size, gfp);
}
+static inline struct page *page_pool_alloc(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size, gfp_t gfp)
+{
+ unsigned int max_size = PAGE_SIZE << pool->p.order;
+ struct page *page;
+
+ if ((*size << 1) > max_size) {
+ *size = max_size;
+ *offset = 0;
+ return page_pool_alloc_pages(pool, gfp);
+ }
+
+ page = page_pool_alloc_frag(pool, offset, *size, gfp);
+ if (unlikely(!page))
+ return NULL;
+
+ /* There is very likely not enough space for another fragment, so append
+ * the remaining size to the current fragment to avoid truesize
+ * underestimate problem.
+ */
+ if (pool->frag_offset + *size > max_size) {
+ *size = max_size - *offset;
+ pool->frag_offset = max_size;
+ }
+
+ return page;
+}
+
+/**
+ * page_pool_dev_alloc() - allocate a page or a page fragment.
+ * @pool: pool from which to allocate
+ * @offset: offset to the allocated page
+ * @size: in as the requested size, out as the allocated size
+ *
+ * Get a page or a page fragment from the page allocator or page_pool caches
+ * depending on the requested size in order to allocate memory with least memory
+ * utilization and performance penalty.
+ *
+ * Return:
+ * Return allocated page or page fragment, otherwise return NULL.
+ */
+static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc(pool, offset, size, gfp);
+}
+
+static inline void *page_pool_alloc_va(struct page_pool *pool,
+ unsigned int *size, gfp_t gfp)
+{
+ unsigned int offset;
+ struct page *page;
+
+ /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */
+ page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM);
+ if (unlikely(!page))
+ return NULL;
+
+ return page_address(page) + offset;
+}
+
+/**
+ * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its
+ * va.
+ * @pool: pool from which to allocate
+ * @size: in as the requested size, out as the allocated size
+ *
+ * This is just a thin wrapper around the page_pool_alloc() API, and
+ * it returns va of the allocated page or page fragment.
+ *
+ * Return:
+ * Return the va for the allocated page or page fragment, otherwise return NULL.
+ */
+static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
+ unsigned int *size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_va(pool, size, gfp);
+}
+
/**
* page_pool_get_dma_dir() - Retrieve the stored DMA direction.
* @pool: pool from which page was allocated
@@ -89,54 +208,120 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
* Get the stored dma direction. A driver might decide to store this locally
* and avoid the extra cache line from page_pool to determine the direction.
*/
-static
-inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
+static inline enum dma_data_direction
+page_pool_get_dma_dir(const struct page_pool *pool)
{
return pool->p.dma_dir;
}
-/* pp_frag_count represents the number of writers who can update the page
- * either by updating skb->data or via DMA mappings for the device.
- * We can't rely on the page refcnt for that as we don't know who might be
- * holding page references and we can't reliably destroy or sync DMA mappings
- * of the fragments.
+static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
+{
+ atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr);
+}
+
+/**
+ * page_pool_fragment_page() - split a fresh page into fragments
+ * @page: page to split
+ * @nr: references to set
+ *
+ * pp_ref_count represents the number of outstanding references to the page,
+ * which will be freed using page_pool APIs (rather than page allocator APIs
+ * like put_page()). Such references are usually held by page_pool-aware
+ * objects like skbs marked for page pool recycling.
*
- * When pp_frag_count reaches 0 we can either recycle the page if the page
- * refcnt is 1 or return it back to the memory allocator and destroy any
- * mappings we have.
+ * This helper allows the caller to take (set) multiple references to a
+ * freshly allocated page. The page must be freshly allocated (have a
+ * pp_ref_count of 1). This is commonly done by drivers and
+ * "fragment allocators" to save atomic operations - either when they know
+ * upfront how many references they will need; or to take MAX references and
+ * return the unused ones with a single atomic dec(), instead of performing
+ * multiple atomic inc() operations.
*/
static inline void page_pool_fragment_page(struct page *page, long nr)
{
- atomic_long_set(&page->pp_frag_count, nr);
+ page_pool_fragment_netmem(page_to_netmem(page), nr);
}
-static inline long page_pool_defrag_page(struct page *page, long nr)
+static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
{
+ atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem);
long ret;
- /* If nr == pp_frag_count then we have cleared all remaining
- * references to the page. No need to actually overwrite it, instead
- * we can leave this to be overwritten by the calling function.
+ /* If nr == pp_ref_count then we have cleared all remaining
+ * references to the page:
+ * 1. 'n == 1': no need to actually overwrite it.
+ * 2. 'n != 1': overwrite it with one, which is the rare case
+ * for pp_ref_count draining.
*
- * The main advantage to doing this is that an atomic_read is
- * generally a much cheaper operation than an atomic update,
- * especially when dealing with a page that may be partitioned
- * into only 2 or 3 pieces.
+ * The main advantage to doing this is that not only we avoid a atomic
+ * update, as an atomic_read is generally a much cheaper operation than
+ * an atomic update, especially when dealing with a page that may be
+ * referenced by only 2 or 3 users; but also unify the pp_ref_count
+ * handling by ensuring all pages have partitioned into only 1 piece
+ * initially, and only overwrite it when the page is partitioned into
+ * more than one piece.
*/
- if (atomic_long_read(&page->pp_frag_count) == nr)
+ if (atomic_long_read(pp_ref_count) == nr) {
+ /* As we have ensured nr is always one for constant case using
+ * the BUILD_BUG_ON(), only need to handle the non-constant case
+ * here for pp_ref_count draining, which is a rare case.
+ */
+ BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
+ if (!__builtin_constant_p(nr))
+ atomic_long_set(pp_ref_count, 1);
+
return 0;
+ }
- ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ ret = atomic_long_sub_return(nr, pp_ref_count);
WARN_ON(ret < 0);
+
+ /* We are the last user here too, reset pp_ref_count back to 1 to
+ * ensure all pages have been partitioned into 1 piece initially,
+ * this should be the rare case when the last two fragment users call
+ * page_pool_unref_page() currently.
+ */
+ if (unlikely(!ret))
+ atomic_long_set(pp_ref_count, 1);
+
return ret;
}
-static inline bool page_pool_is_last_frag(struct page_pool *pool,
- struct page *page)
+static inline long page_pool_unref_page(struct page *page, long nr)
+{
+ return page_pool_unref_netmem(page_to_netmem(page), nr);
+}
+
+static inline void page_pool_ref_netmem(netmem_ref netmem)
+{
+ atomic_long_inc(netmem_get_pp_ref_count_ref(netmem));
+}
+
+static inline void page_pool_ref_page(struct page *page)
+{
+ page_pool_ref_netmem(page_to_netmem(page));
+}
+
+static inline bool page_pool_is_last_ref(netmem_ref netmem)
{
- /* If fragments aren't enabled or count is 0 we were the last user */
- return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
- (page_pool_defrag_page(page, 1) == 0);
+ /* If page_pool_unref_page() returns 0, we were the last user */
+ return page_pool_unref_netmem(netmem, 1) == 0;
+}
+
+static inline void page_pool_put_netmem(struct page_pool *pool,
+ netmem_ref netmem,
+ unsigned int dma_sync_size,
+ bool allow_direct)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ if (!page_pool_is_last_ref(netmem))
+ return;
+
+ page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct);
+#endif
}
/**
@@ -157,15 +342,15 @@ static inline void page_pool_put_page(struct page_pool *pool,
unsigned int dma_sync_size,
bool allow_direct)
{
- /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
- * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
- */
-#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_frag(pool, page))
- return;
+ page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size,
+ allow_direct);
+}
- page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
-#endif
+static inline void page_pool_put_full_netmem(struct page_pool *pool,
+ netmem_ref netmem,
+ bool allow_direct)
+{
+ page_pool_put_netmem(pool, netmem, -1, allow_direct);
}
/**
@@ -180,7 +365,7 @@ static inline void page_pool_put_page(struct page_pool *pool,
static inline void page_pool_put_full_page(struct page_pool *pool,
struct page *page, bool allow_direct)
{
- page_pool_put_page(pool, page, -1, allow_direct);
+ page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct);
}
/**
@@ -197,31 +382,65 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
page_pool_put_full_page(pool, page, true);
}
-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \
(sizeof(dma_addr_t) > sizeof(unsigned long))
/**
+ * page_pool_free_va() - free a va into the page_pool
+ * @pool: pool from which va was allocated
+ * @va: va to be freed
+ * @allow_direct: freed by the consumer, allow lockless caching
+ *
+ * Free a va allocated from page_pool_allo_va().
+ */
+static inline void page_pool_free_va(struct page_pool *pool, void *va,
+ bool allow_direct)
+{
+ page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
+}
+
+static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
+{
+ dma_addr_t ret = netmem_get_dma_addr(netmem);
+
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+ ret <<= PAGE_SHIFT;
+
+ return ret;
+}
+
+/**
* page_pool_get_dma_addr() - Retrieve the stored DMA address.
* @page: page allocated from a page pool
*
* Fetch the DMA address of the page. The page pool to which the page belongs
* must had been created with PP_FLAG_DMA_MAP.
*/
-static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
{
- dma_addr_t ret = page->dma_addr;
-
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
-
- return ret;
+ return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
}
-static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+/**
+ * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW
+ * @pool: &page_pool the @page belongs to
+ * @page: page to sync
+ * @offset: offset from page start to "hard" start if using PP frags
+ * @dma_sync_size: size of the data written to the page
+ *
+ * Can be used as a shorthand to sync Rx pages before accessing them in the
+ * driver. Caller must ensure the pool was created with ``PP_FLAG_DMA_MAP``.
+ * Note that this version performs DMA sync unconditionally, even if the
+ * associated PP doesn't perform sync-for-device.
+ */
+static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
+ const struct page *page,
+ u32 offset, u32 dma_sync_size)
{
- page->dma_addr = addr;
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- page->dma_addr_upper = upper_32_bits(addr);
+ dma_sync_single_range_for_cpu(pool->p.dev,
+ page_pool_get_dma_addr(page),
+ offset + pool->p.offset, dma_sync_size,
+ page_pool_get_dma_dir(pool));
}
static inline bool page_pool_put(struct page_pool *pool)
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 887e7946a597..f53e2c90b686 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -5,6 +5,9 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+#include <net/netmem.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
* map/unmap
@@ -17,10 +20,22 @@
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
-#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
- PP_FLAG_DMA_SYNC_DEV |\
- PP_FLAG_PAGE_FRAG)
+#define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */
+
+/* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting
+ * this must be able to support unreadable netmem, where netmem_address() would
+ * return NULL. This flag should not be set for header page_pools.
+ *
+ * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set
+ * page_pool_params.slow.queue_idx.
+ */
+#define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3)
+
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
+ PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
+
+/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */
+#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1)
/*
* Fast allocation side cache array/stack
@@ -40,12 +55,12 @@
#define PP_ALLOC_CACHE_REFILL 64
struct pp_alloc_cache {
u32 count;
- struct page *cache[PP_ALLOC_CACHE_SIZE];
+ netmem_ref cache[PP_ALLOC_CACHE_SIZE];
};
/**
* struct page_pool_params - page pool parameters
- * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG
+ * @fast: params accessed frequently on hotpath
* @order: 2^order pages on allocation
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
@@ -54,20 +69,31 @@ struct pp_alloc_cache {
* @dma_dir: DMA mapping direction
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
+ * @slow: params with slowpath access only (initialization and Netlink)
+ * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
+ * @queue_idx: queue idx this page_pool is being created for.
+ * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL,
+ * PP_FLAG_ALLOW_UNREADABLE_NETMEM.
*/
struct page_pool_params {
- unsigned int flags;
- unsigned int order;
- unsigned int pool_size;
- int nid;
- struct device *dev;
- struct napi_struct *napi;
- enum dma_data_direction dma_dir;
- unsigned int max_len;
- unsigned int offset;
+ struct_group_tagged(page_pool_params_fast, fast,
+ unsigned int order;
+ unsigned int pool_size;
+ int nid;
+ struct device *dev;
+ struct napi_struct *napi;
+ enum dma_data_direction dma_dir;
+ unsigned int max_len;
+ unsigned int offset;
+ );
+ struct_group_tagged(page_pool_params_slow, slow,
+ struct net_device *netdev;
+ unsigned int queue_idx;
+ unsigned int flags;
/* private: used by test code only */
- void (*init_callback)(struct page *page, void *arg);
- void *init_arg;
+ void (*init_callback)(netmem_ref netmem, void *arg);
+ void *init_arg;
+ );
};
#ifdef CONFIG_PAGE_POOL_STATS
@@ -120,13 +146,38 @@ struct page_pool_stats {
};
#endif
+/* The whole frag API block must stay within one cacheline. On 32-bit systems,
+ * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``.
+ * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``.
+ * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that
+ * one for simplicity.
+ * Having it aligned to a cacheline boundary may be excessive and doesn't bring
+ * any good.
+ */
+#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
+
+struct pp_memory_provider_params {
+ void *mp_priv;
+};
+
struct page_pool {
- struct page_pool_params p;
+ struct page_pool_params_fast p;
+
+ int cpuid;
+ u32 pages_state_hold_cnt;
+ bool has_init_callback:1; /* slow::init_callback is set */
+ bool dma_map:1; /* Perform DMA mapping */
+ bool dma_sync:1; /* Perform DMA sync */
+#ifdef CONFIG_PAGE_POOL_STATS
+ bool system:1; /* This is a global percpu pool */
+#endif
+
+ __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
long frag_users;
- struct page *frag_page;
+ netmem_ref frag_page;
unsigned int frag_offset;
- u32 pages_state_hold_cnt;
+ __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
struct delayed_work release_dw;
void (*disconnect)(void *pool);
@@ -167,6 +218,10 @@ struct page_pool {
*/
struct ptr_ring ring;
+ void *mp_priv;
+
+ struct xarray dma_mapped;
+
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
struct page_pool_recycle_stats __percpu *recycle_stats;
@@ -180,34 +235,46 @@ struct page_pool {
refcount_t user_cnt;
u64 destroy_cnt;
+
+ /* Slow/Control-path information follows */
+ struct page_pool_params_slow slow;
+ /* User-facing fields, protected by page_pools_lock */
+ struct {
+ struct hlist_node list;
+ u64 detach_time;
+ u32 napi_id;
+ u32 id;
+ } user;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp);
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
+netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
+ unsigned int *offset, unsigned int size,
+ gfp_t gfp);
struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
+ int cpuid);
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
-void page_pool_unlink_napi(struct page_pool *pool);
+void page_pool_disable_direct_recycling(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
- struct xdp_mem_info *mem);
+ const struct xdp_mem_info *mem);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
-static inline void page_pool_unlink_napi(struct page_pool *pool)
-{
-}
-
static inline void page_pool_destroy(struct page_pool *pool)
{
}
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
void (*disconnect)(void *),
- struct xdp_mem_info *mem)
+ const struct xdp_mem_info *mem)
{
}
@@ -217,9 +284,12 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
#endif
-void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
unsigned int dma_sync_size,
bool allow_direct);
+void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct);
static inline bool is_page_pool_compiled_in(void)
{
diff --git a/include/net/pfcp.h b/include/net/pfcp.h
new file mode 100644
index 000000000000..af14f970b80e
--- /dev/null
+++ b/include/net/pfcp.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PFCP_H_
+#define _PFCP_H_
+
+#include <uapi/linux/if_ether.h>
+#include <net/dst_metadata.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/ipv6.h>
+#include <net/udp_tunnel.h>
+#include <uapi/linux/udp.h>
+#include <uapi/linux/ip.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define PFCP_PORT 8805
+
+/* PFCP protocol header */
+struct pfcphdr {
+ u8 flags;
+ u8 message_type;
+ __be16 message_length;
+};
+
+/* PFCP header flags */
+#define PFCP_SEID_FLAG BIT(0)
+#define PFCP_MP_FLAG BIT(1)
+
+#define PFCP_VERSION_MASK GENMASK(4, 0)
+
+#define PFCP_HLEN (sizeof(struct udphdr) + sizeof(struct pfcphdr))
+
+/* PFCP node related messages */
+struct pfcphdr_node {
+ u8 seq_number[3];
+ u8 reserved;
+};
+
+/* PFCP session related messages */
+struct pfcphdr_session {
+ __be64 seid;
+ u8 seq_number[3];
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ u8 message_priority:4,
+ reserved:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:4,
+ message_priprity:4;
+#else
+#error "Please fix <asm/byteorder>"
+#endif
+};
+
+struct pfcp_metadata {
+ u8 type;
+ __be64 seid;
+} __packed;
+
+enum {
+ PFCP_TYPE_NODE = 0,
+ PFCP_TYPE_SESSION = 1,
+};
+
+#define PFCP_HEADROOM (sizeof(struct iphdr) + sizeof(struct udphdr) + \
+ sizeof(struct pfcphdr) + sizeof(struct ethhdr))
+#define PFCP6_HEADROOM (sizeof(struct ipv6hdr) + sizeof(struct udphdr) + \
+ sizeof(struct pfcphdr) + sizeof(struct ethhdr))
+
+static inline struct pfcphdr *pfcp_hdr(struct sk_buff *skb)
+{
+ return (struct pfcphdr *)(udp_hdr(skb) + 1);
+}
+
+static inline struct pfcphdr_node *pfcp_hdr_node(struct sk_buff *skb)
+{
+ return (struct pfcphdr_node *)(pfcp_hdr(skb) + 1);
+}
+
+static inline struct pfcphdr_session *pfcp_hdr_session(struct sk_buff *skb)
+{
+ return (struct pfcphdr_session *)(pfcp_hdr(skb) + 1);
+}
+
+static inline bool netif_is_pfcp(const struct net_device *dev)
+{
+ return dev->rtnl_link_ops &&
+ !strcmp(dev->rtnl_link_ops->kind, "pfcp");
+}
+
+#endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index f308e8268651..4229e4fcd2a9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -24,6 +24,8 @@ struct tcf_walker {
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
void unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
+#define NET_CLS_ALIAS_PREFIX "net-cls-"
+#define MODULE_ALIAS_NET_CLS(kind) MODULE_ALIAS(NET_CLS_ALIAS_PREFIX kind)
struct tcf_block_ext_info {
enum flow_block_binder_type binder_type;
@@ -72,6 +74,15 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block)
return block && block->index;
}
+#ifdef CONFIG_NET_CLS_ACT
+DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
+
+static inline bool tcf_block_bypass_sw(struct tcf_block *block)
+{
+ return block && !atomic_read(&block->useswcnt);
+}
+#endif
+
static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
{
WARN_ON(tcf_block_shared(block));
@@ -480,7 +491,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
struct tcf_pkt_info *);
/**
- * tcf_em_tree_match - evaulate an ematch tree
+ * tcf_em_tree_match - evaluate an ematch tree
*
* @skb: socket buffer of the packet in question
* @tree: ematch tree to be used for evaluation
@@ -748,6 +759,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
cls_common->extack = extack;
}
+static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags)
+{
+ if (tp->usesw)
+ return;
+ if (tc_skip_sw(flags) && tc_in_hw(flags))
+ return;
+ tp->usesw = true;
+}
+
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
{
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 15960564e0c3..d7b7b6cd4aa1 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -20,10 +20,10 @@ struct qdisc_walker {
int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
};
-static inline void *qdisc_priv(struct Qdisc *q)
-{
- return &q->privdata;
-}
+#define qdisc_priv(q) \
+ _Generic(q, \
+ const struct Qdisc * : (const void *)&q->privdata, \
+ struct Qdisc * : (void *)&q->privdata)
static inline struct Qdisc *qdisc_from_priv(void *priv)
{
@@ -100,6 +100,8 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
int register_qdisc(struct Qdisc_ops *qops);
void unregister_qdisc(struct Qdisc_ops *qops);
+#define NET_SCH_ALIAS_PREFIX "net-sch-"
+#define MODULE_ALIAS_NET_SCH(id) MODULE_ALIAS(NET_SCH_ALIAS_PREFIX id)
void qdisc_get_default(char *id, size_t len);
int qdisc_set_default(const char *id);
@@ -275,24 +277,6 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
}
-struct tc_skb_cb {
- struct qdisc_skb_cb qdisc_cb;
-
- u16 mru;
- u8 post_ct:1;
- u8 post_ct_snat:1;
- u8 post_ct_dnat:1;
- u16 zone; /* Only valid if post_ct = true */
-};
-
-static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
-{
- struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
-
- BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
- return cb;
-}
-
static inline bool tc_qdisc_stats_dump(struct Qdisc *sch,
unsigned long cl,
struct qdisc_walker *arg)
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
new file mode 100644
index 000000000000..a6ab2f4f5e28
--- /dev/null
+++ b/include/net/proto_memory.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PROTO_MEMORY_H
+#define _PROTO_MEMORY_H
+
+#include <net/sock.h>
+#include <net/hotdata.h>
+
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
+static inline bool sk_has_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure != NULL;
+}
+
+static inline bool
+proto_memory_pressure(const struct proto *prot)
+{
+ if (!prot->memory_pressure)
+ return false;
+ return !!READ_ONCE(*prot->memory_pressure);
+}
+
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return proto_memory_pressure(sk->sk_prot);
+}
+
+static inline bool sk_under_memory_pressure(const struct sock *sk)
+{
+ if (!sk->sk_prot->memory_pressure)
+ return false;
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ return true;
+
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
+static inline long
+proto_memory_allocated(const struct proto *prot)
+{
+ return max(0L, atomic_long_read(prot->memory_allocated));
+}
+
+static inline long
+sk_memory_allocated(const struct sock *sk)
+{
+ return proto_memory_allocated(sk->sk_prot);
+}
+
+static inline void proto_memory_pcpu_drain(struct proto *proto)
+{
+ int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
+
+ if (val)
+ atomic_long_add(val, proto->memory_allocated);
+}
+
+static inline void
+sk_memory_allocated_add(const struct sock *sk, int val)
+{
+ struct proto *proto = sk->sk_prot;
+
+ val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val >= READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
+}
+
+static inline void
+sk_memory_allocated_sub(const struct sock *sk, int val)
+{
+ struct proto *proto = sk->sk_prot;
+
+ val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val <= -READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
+}
+
+#endif /* _PROTO_MEMORY_H */
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 6aef8cb11cc8..b2499f88f8f8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -46,6 +46,7 @@ struct net_protocol {
* socket lookup?
*/
icmp_strict_tag_validation:1;
+ u32 secret;
};
#if IS_ENABLED(CONFIG_IPV6)
@@ -59,6 +60,7 @@ struct inet6_protocol {
__be32 info);
unsigned int flags; /* INET6_PROTO_xxx */
+ u32 secret;
};
#define INET6_PROTO_NOPOLICY 0x1
@@ -68,6 +70,7 @@ struct inet6_protocol {
struct net_offload {
struct offload_callbacks callbacks;
unsigned int flags; /* Flags used by IPv6 for now */
+ u32 secret;
};
/* This should be set for any extension header which is compatible with GSO. */
#define INET6_PROTO_GSO_EXTHDR 0x1
diff --git a/include/net/psample.h b/include/net/psample.h
index 0509d2d6be67..5071b5fc2b59 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -24,7 +24,10 @@ struct psample_metadata {
u8 out_tc_valid:1,
out_tc_occ_valid:1,
latency_valid:1,
- unused:5;
+ rate_as_probability:1,
+ unused:4;
+ const u8 *user_cookie;
+ u32 user_cookie_len;
};
struct psample_group *psample_group_get(struct net *net, u32 group_num);
@@ -35,13 +38,15 @@ struct sk_buff;
#if IS_ENABLED(CONFIG_PSAMPLE)
-void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
- u32 sample_rate, const struct psample_metadata *md);
+void psample_sample_packet(struct psample_group *group,
+ const struct sk_buff *skb, u32 sample_rate,
+ const struct psample_metadata *md);
#else
static inline void psample_sample_packet(struct psample_group *group,
- struct sk_buff *skb, u32 sample_rate,
+ const struct sk_buff *skb,
+ u32 sample_rate,
const struct psample_metadata *md)
{
}
diff --git a/include/net/red.h b/include/net/red.h
index 425364de0df7..159a09359fc0 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -40,7 +40,7 @@
max_P should be small (not 1), usually 0.01..0.02 is good value.
max_P is chosen as a number, so that max_P/(th_max-th_min)
- is a negative power of two in order arithmetics to contain
+ is a negative power of two in order arithmetic to contain
only shifts.
@@ -159,7 +159,7 @@ static inline u32 red_maxp(u8 Plog)
static inline void red_set_vars(struct red_vars *v)
{
/* Reset average queue length, the value is strictly bound
- * to the parameters below, reseting hurts a bit but leaving
+ * to the parameters below, resetting hurts a bit but leaving
* it might result in an unreasonable qavg for a while. --TGR
*/
v->qavg = 0;
@@ -233,10 +233,10 @@ static inline void red_set_parms(struct red_parms *p,
int delta = qth_max - qth_min;
u32 max_p_delta;
- p->qth_min = qth_min << Wlog;
- p->qth_max = qth_max << Wlog;
- p->Wlog = Wlog;
- p->Plog = Plog;
+ WRITE_ONCE(p->qth_min, qth_min << Wlog);
+ WRITE_ONCE(p->qth_max, qth_max << Wlog);
+ WRITE_ONCE(p->Wlog, Wlog);
+ WRITE_ONCE(p->Plog, Plog);
if (delta <= 0)
delta = 1;
p->qth_delta = delta;
@@ -244,7 +244,7 @@ static inline void red_set_parms(struct red_parms *p,
max_P = red_maxp(Plog);
max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
}
- p->max_P = max_P;
+ WRITE_ONCE(p->max_P, max_P);
max_p_delta = max_P / delta;
max_p_delta = max(max_p_delta, 1U);
p->max_P_reciprocal = reciprocal_value(max_p_delta);
@@ -257,7 +257,7 @@ static inline void red_set_parms(struct red_parms *p,
p->target_min = qth_min + 2*delta;
p->target_max = qth_min + 3*delta;
- p->Scell_log = Scell_log;
+ WRITE_ONCE(p->Scell_log, Scell_log);
p->Scell_max = (255 << Scell_log);
if (stab)
@@ -340,7 +340,7 @@ static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p
{
/*
* NOTE: v->qavg is fixed point number with point at Wlog.
- * The formula below is equvalent to floating point
+ * The formula below is equivalent to floating point
* version:
*
* qavg = qavg*(1-W) + backlog*W;
@@ -375,7 +375,7 @@ static inline int red_mark_probability(const struct red_parms *p,
OK. qR is random number in the interval
(0..1/max_P)*(qth_max-qth_min)
i.e. 0..(2^Plog). If we used floating point
- arithmetics, it would be: (2^Plog)*rnd_num,
+ arithmetic, it would be: (2^Plog)*rnd_num,
where rnd_num is less 1.
Taking into account, that qavg have fixed
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index b2cb4a9eb04d..6633627f6e76 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -71,8 +71,6 @@ enum environment_cap {
* CRDA and can be used by other regulatory requests. When a
* the last request is not yet processed we must yield until it
* is processed before processing any new requests.
- * @country_ie_checksum: checksum of the last processed and accepted
- * country IE
* @country_ie_env: lets us know if the AP is telling us we are outdoor,
* indoor, or if it doesn't matter
* @list: used to insert into the reg_requests_list linked list
@@ -123,7 +121,7 @@ struct regulatory_request {
* @REGULATORY_DISABLE_BEACON_HINTS: enable this if your driver needs to
* ensure that passive scan flags and beaconing flags may not be lifted by
* cfg80211 due to regulatory beacon hints. For more information on beacon
- * hints read the documenation for regulatory_hint_found_beacon()
+ * hints read the documentation for regulatory_hint_found_beacon()
* @REGULATORY_COUNTRY_IE_FOLLOW_POWER: for devices that have a preference
* that even though they may have programmed their own custom power
* setting prior to wiphy registration, they want to ensure their channel
@@ -213,6 +211,7 @@ struct ieee80211_reg_rule {
u32 flags;
u32 dfs_cac_ms;
bool has_wmm;
+ s8 psd;
};
struct ieee80211_regdomain {
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 144c39db9898..b07b1cd14e9f 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -18,6 +18,7 @@
#include <linux/refcount.h>
#include <net/sock.h>
+#include <net/rstreason.h>
struct request_sock;
struct sk_buff;
@@ -34,7 +35,8 @@ struct request_sock_ops {
void (*send_ack)(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
void (*send_reset)(const struct sock *sk,
- struct sk_buff *skb);
+ struct sk_buff *skb,
+ enum sk_rst_reason reason);
void (*destructor)(struct request_sock *req);
void (*syn_ack_timeout)(const struct request_sock *req);
};
@@ -61,7 +63,11 @@ struct request_sock {
struct request_sock *dl_next;
u16 mss;
u8 num_retrans; /* number of retransmits */
- u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */
+ u8 syncookie:1; /* True if
+ * 1) tcpopts needs to be encoded in
+ * TS of SYN+ACK
+ * 2) ACK is validated by BPF kfunc.
+ */
u8 num_timeout:7; /* number of timeouts */
u32 ts_recent;
struct timer_list rsk_timer;
@@ -83,35 +89,43 @@ static inline struct sock *req_to_sk(struct request_sock *req)
return (struct sock *)req;
}
-static inline struct request_sock *
-reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
- bool attach_listener)
+/**
+ * skb_steal_sock - steal a socket from an sk_buff
+ * @skb: sk_buff to steal the socket from
+ * @refcounted: is set to true if the socket is reference-counted
+ * @prefetched: is set to true if the socket was assigned from bpf
+ */
+static inline struct sock *skb_steal_sock(struct sk_buff *skb,
+ bool *refcounted, bool *prefetched)
{
- struct request_sock *req;
+ struct sock *sk = skb->sk;
- req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
- if (!req)
+ if (!sk) {
+ *prefetched = false;
+ *refcounted = false;
return NULL;
- req->rsk_listener = NULL;
- if (attach_listener) {
- if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
- kmem_cache_free(ops->slab, req);
- return NULL;
+ }
+
+ *prefetched = skb_sk_is_prefetched(skb);
+ if (*prefetched) {
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ *refcounted = false;
+ sk = req->rsk_listener;
+ req->rsk_listener = NULL;
+ return sk;
}
- req->rsk_listener = sk_listener;
+#endif
+ *refcounted = sk_is_refcounted(sk);
+ } else {
+ *refcounted = true;
}
- req->rsk_ops = ops;
- req_to_sk(req)->sk_prot = sk_listener->sk_prot;
- sk_node_init(&req_to_sk(req)->sk_node);
- sk_tx_queue_clear(req_to_sk(req));
- req->saved_syn = NULL;
- req->timeout = 0;
- req->num_timeout = 0;
- req->num_retrans = 0;
- req->sk = NULL;
- refcount_set(&req->rsk_refcnt, 0);
- return req;
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ return sk;
}
static inline void __reqsk_free(struct request_sock *req)
@@ -125,14 +139,14 @@ static inline void __reqsk_free(struct request_sock *req)
static inline void reqsk_free(struct request_sock *req)
{
- WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
+ DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
__reqsk_free(req);
}
static inline void reqsk_put(struct request_sock *req)
{
if (refcount_dec_and_test(&req->rsk_refcnt))
- reqsk_free(req);
+ __reqsk_free(req);
}
/*
@@ -238,4 +252,16 @@ static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
return atomic_read(&queue->young);
}
+/* RFC 7323 2.3 Using the Window Scale Option
+ * The window field (SEG.WND) of every outgoing segment, with the
+ * exception of <SYN> segments, MUST be right-shifted by
+ * Rcv.Wind.Shift bits.
+ *
+ * This means the SEG.WND carried in SYNACK can not exceed 65535.
+ * We use this property to harden TCP stack while in NEW_SYN_RECV state.
+ */
+static inline u32 tcp_synack_window(const struct request_sock *req)
+{
+ return min(req->rsk_rcv_wnd, 65535U);
+}
#endif /* _REQUEST_SOCK_H */
diff --git a/include/net/route.h b/include/net/route.h
index 51a45b1887b5..8a11d19f897b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -27,6 +27,7 @@
#include <net/ip_fib.h>
#include <net/arp.h>
#include <net/ndisc.h>
+#include <net/inet_dscp.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -35,11 +36,6 @@
#include <linux/cache.h>
#include <linux/security.h>
-#define RTO_ONLINK 0x01
-
-#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
-#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
-
static inline __u8 ip_sock_rt_scope(const struct sock *sk)
{
if (sock_flag(sk, SOCK_LOCALROUTE))
@@ -50,7 +46,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk)
static inline __u8 ip_sock_rt_tos(const struct sock *sk)
{
- return RT_TOS(inet_sk(sk)->tos);
+ return READ_ONCE(inet_sk(sk)->tos) & INET_DSCP_MASK;
}
struct ip_tunnel_info;
@@ -80,6 +76,17 @@ struct rtable {
rt_pmtu:31;
};
+#define dst_rtable(_ptr) container_of_const(_ptr, struct rtable, dst)
+
+/**
+ * skb_rtable - Returns the skb &rtable
+ * @skb: buffer
+ */
+static inline struct rtable *skb_rtable(const struct sk_buff *skb)
+{
+ return dst_rtable(skb_dst(skb));
+}
+
static inline bool rt_is_input_route(const struct rtable *rt)
{
return rt->rt_is_input != 0;
@@ -136,12 +143,6 @@ static inline struct rtable *__ip_route_output_key(struct net *net,
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
const struct sock *sk);
-struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
- struct net_device *dev,
- struct net *net, __be32 *saddr,
- const struct ip_tunnel_info *info,
- u8 protocol, bool use_cache);
-
struct dst_entry *ipv4_blackhole_route(struct net *net,
struct dst_entry *dst_orig);
@@ -150,15 +151,22 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4
return ip_route_output_flow(net, flp, NULL);
}
+/* Simplistic IPv4 route lookup function.
+ * This is only suitable for some particular use cases: since the flowi4
+ * structure is only partially set, it may bypass some fib-rules.
+ */
static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
- __be32 saddr, u8 tos, int oif)
+ __be32 saddr, u8 tos, int oif,
+ __u8 scope)
{
struct flowi4 fl4 = {
.flowi4_oif = oif,
.flowi4_tos = tos,
+ .flowi4_scope = scope,
.daddr = daddr,
.saddr = saddr,
};
+
return ip_route_output_key(net, &fl4);
}
@@ -169,8 +177,8 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
__u8 proto, __u8 tos, int oif)
{
flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
- RT_SCOPE_UNIVERSE, proto,
- sk ? inet_sk_flowi_flags(sk) : 0,
+ sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE,
+ proto, sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
if (sk)
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
@@ -200,12 +208,13 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
const struct sk_buff *hint);
static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
- u8 tos, struct net_device *devin)
+ dscp_t dscp, struct net_device *devin)
{
int err;
rcu_read_lock();
- err = ip_route_input_noref(skb, dst, src, tos, devin);
+ err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp),
+ devin);
if (!err) {
skb_dst_force(skb);
if (!skb_dst(skb))
@@ -258,8 +267,6 @@ static inline void ip_rt_put(struct rtable *rt)
dst_release(&rt->dst);
}
-#define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
-
extern const __u8 ip_tos2prio[16];
static inline char rt_tos2priority(u8 tos)
@@ -357,10 +364,15 @@ static inline int inet_iif(const struct sk_buff *skb)
static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
{
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
- struct net *net = dev_net(dst->dev);
- if (hoplimit == 0)
+ if (hoplimit == 0) {
+ const struct net *net;
+
+ rcu_read_lock();
+ net = dev_net_rcu(dst->dev);
hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
+ rcu_read_unlock();
+ }
return hoplimit;
}
diff --git a/include/net/rps.h b/include/net/rps.h
new file mode 100644
index 000000000000..a93401d23d66
--- /dev/null
+++ b/include/net/rps.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_RPS_H
+#define _NET_RPS_H
+
+#include <linux/types.h>
+#include <linux/static_key.h>
+#include <net/sock.h>
+#include <net/hotdata.h>
+
+#ifdef CONFIG_RPS
+
+extern struct static_key_false rps_needed;
+extern struct static_key_false rfs_needed;
+
+/*
+ * This structure holds an RPS map which can be of variable length. The
+ * map is an array of CPUs.
+ */
+struct rps_map {
+ unsigned int len;
+ struct rcu_head rcu;
+ u16 cpus[];
+};
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
+
+/*
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
+ * tail pointer for that CPU's input queue at the time of last enqueue, and
+ * a hardware filter index.
+ */
+struct rps_dev_flow {
+ u16 cpu;
+ u16 filter;
+ unsigned int last_qtail;
+};
+#define RPS_NO_FILTER 0xffff
+
+/*
+ * The rps_dev_flow_table structure contains a table of flow mappings.
+ */
+struct rps_dev_flow_table {
+ unsigned int mask;
+ struct rcu_head rcu;
+ struct rps_dev_flow flows[];
+};
+#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
+ ((_num) * sizeof(struct rps_dev_flow)))
+
+/*
+ * The rps_sock_flow_table contains mappings of flows to the last CPU
+ * on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high-order bits
+ * of flow hash, lower part is CPU number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
+ */
+struct rps_sock_flow_table {
+ u32 mask;
+
+ u32 ents[] ____cacheline_aligned_in_smp;
+};
+#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
+
+#define RPS_NO_CPU 0xffff
+
+static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+ u32 hash)
+{
+ unsigned int index = hash & table->mask;
+ u32 val = hash & ~net_hotdata.rps_cpu_mask;
+
+ /* We only give a hint, preemption can change CPU under us */
+ val |= raw_smp_processor_id();
+
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in get_rps_cpu().
+ */
+ if (READ_ONCE(table->ents[index]) != val)
+ WRITE_ONCE(table->ents[index], val);
+}
+
+#endif /* CONFIG_RPS */
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *sock_flow_table;
+
+ if (!hash)
+ return;
+ rcu_read_lock();
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
+ if (sock_flow_table)
+ rps_record_sock_flow(sock_flow_table, hash);
+ rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (static_branch_unlikely(&rfs_needed)) {
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
+ }
+#endif
+}
+
+static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ return ++sd->input_queue_tail;
+#else
+ return 0;
+#endif
+}
+
+static inline void rps_input_queue_tail_save(u32 *dest, u32 tail)
+{
+#ifdef CONFIG_RPS
+ WRITE_ONCE(*dest, tail);
+#endif
+}
+
+static inline void rps_input_queue_head_add(struct softnet_data *sd, int val)
+{
+#ifdef CONFIG_RPS
+ WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val);
+#endif
+}
+
+static inline void rps_input_queue_head_incr(struct softnet_data *sd)
+{
+ rps_input_queue_head_add(sd, 1);
+}
+
+#endif /* _NET_RPS_H */
diff --git a/include/net/rstreason.h b/include/net/rstreason.h
new file mode 100644
index 000000000000..69cb2e52b7da
--- /dev/null
+++ b/include/net/rstreason.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _LINUX_RSTREASON_H
+#define _LINUX_RSTREASON_H
+#include <net/dropreason-core.h>
+#include <uapi/linux/mptcp.h>
+
+#define DEFINE_RST_REASON(FN, FNe) \
+ FN(NOT_SPECIFIED) \
+ FN(NO_SOCKET) \
+ FN(TCP_INVALID_ACK_SEQUENCE) \
+ FN(TCP_RFC7323_PAWS) \
+ FN(TCP_TOO_OLD_ACK) \
+ FN(TCP_ACK_UNSENT_DATA) \
+ FN(TCP_FLAGS) \
+ FN(TCP_OLD_ACK) \
+ FN(TCP_ABORT_ON_DATA) \
+ FN(TCP_TIMEWAIT_SOCKET) \
+ FN(INVALID_SYN) \
+ FN(TCP_ABORT_ON_CLOSE) \
+ FN(TCP_ABORT_ON_LINGER) \
+ FN(TCP_ABORT_ON_MEMORY) \
+ FN(TCP_STATE) \
+ FN(TCP_KEEPALIVE_TIMEOUT) \
+ FN(TCP_DISCONNECT_WITH_DATA) \
+ FN(MPTCP_RST_EUNSPEC) \
+ FN(MPTCP_RST_EMPTCP) \
+ FN(MPTCP_RST_ERESOURCE) \
+ FN(MPTCP_RST_EPROHIBIT) \
+ FN(MPTCP_RST_EWQ2BIG) \
+ FN(MPTCP_RST_EBADPERF) \
+ FN(MPTCP_RST_EMIDDLEBOX) \
+ FN(ERROR) \
+ FNe(MAX)
+
+/**
+ * enum sk_rst_reason - the reasons of socket reset
+ *
+ * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols.
+ *
+ * There are three parts in order:
+ * 1) skb drop reasons: relying on drop reasons for such as passive reset
+ * 2) independent reset reasons: such as active reset reasons
+ * 3) reset reasons in MPTCP: only for MPTCP use
+ */
+enum sk_rst_reason {
+ /* Refer to include/net/dropreason-core.h
+ * Rely on skb drop reasons because it indicates exactly why RST
+ * could happen.
+ */
+ /** @SK_RST_REASON_NOT_SPECIFIED: reset reason is not specified */
+ SK_RST_REASON_NOT_SPECIFIED,
+ /** @SK_RST_REASON_NO_SOCKET: no valid socket that can be used */
+ SK_RST_REASON_NO_SOCKET,
+ /**
+ * @SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
+ * field because ack sequence is not in the window between snd_una
+ * and snd_nxt
+ */
+ SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE,
+ /**
+ * @SK_RST_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
+ * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
+ */
+ SK_RST_REASON_TCP_RFC7323_PAWS,
+ /** @SK_RST_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */
+ SK_RST_REASON_TCP_TOO_OLD_ACK,
+ /**
+ * @SK_RST_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't
+ * sent yet
+ */
+ SK_RST_REASON_TCP_ACK_UNSENT_DATA,
+ /** @SK_RST_REASON_TCP_FLAGS: TCP flags invalid */
+ SK_RST_REASON_TCP_FLAGS,
+ /** @SK_RST_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */
+ SK_RST_REASON_TCP_OLD_ACK,
+ /**
+ * @SK_RST_REASON_TCP_ABORT_ON_DATA: abort on data
+ * corresponding to LINUX_MIB_TCPABORTONDATA
+ */
+ SK_RST_REASON_TCP_ABORT_ON_DATA,
+
+ /* Here start with the independent reasons */
+ /** @SK_RST_REASON_TCP_TIMEWAIT_SOCKET: happen on the timewait socket */
+ SK_RST_REASON_TCP_TIMEWAIT_SOCKET,
+ /**
+ * @SK_RST_REASON_INVALID_SYN: receive bad syn packet
+ * RFC 793 says if the state is not CLOSED/LISTEN/SYN-SENT then
+ * "fourth, check the SYN bit,...If the SYN is in the window it is
+ * an error, send a reset"
+ */
+ SK_RST_REASON_INVALID_SYN,
+ /**
+ * @SK_RST_REASON_TCP_ABORT_ON_CLOSE: abort on close
+ * corresponding to LINUX_MIB_TCPABORTONCLOSE
+ */
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE,
+ /**
+ * @SK_RST_REASON_TCP_ABORT_ON_LINGER: abort on linger
+ * corresponding to LINUX_MIB_TCPABORTONLINGER
+ */
+ SK_RST_REASON_TCP_ABORT_ON_LINGER,
+ /**
+ * @SK_RST_REASON_TCP_ABORT_ON_MEMORY: abort on memory
+ * corresponding to LINUX_MIB_TCPABORTONMEMORY
+ */
+ SK_RST_REASON_TCP_ABORT_ON_MEMORY,
+ /**
+ * @SK_RST_REASON_TCP_STATE: abort on tcp state
+ * Please see RFC 9293 for all possible reset conditions
+ */
+ SK_RST_REASON_TCP_STATE,
+ /**
+ * @SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT: time to timeout
+ * When we have already run out of all the chances, which means
+ * keepalive timeout, we have to reset the connection
+ */
+ SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT,
+ /**
+ * @SK_RST_REASON_TCP_DISCONNECT_WITH_DATA: disconnect when write
+ * queue is not empty
+ * It means user has written data into the write queue when doing
+ * disconnecting, so we have to send an RST.
+ */
+ SK_RST_REASON_TCP_DISCONNECT_WITH_DATA,
+
+ /* Copy from include/uapi/linux/mptcp.h.
+ * These reset fields will not be changed since they adhere to
+ * RFC 8684. So do not touch them. I'm going to list each definition
+ * of them respectively.
+ */
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EUNSPEC: Unspecified error.
+ * This is the default error; it implies that the subflow is no
+ * longer available. The presence of this option shows that the
+ * RST was generated by an MPTCP-aware device.
+ */
+ SK_RST_REASON_MPTCP_RST_EUNSPEC,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EMPTCP: MPTCP-specific error.
+ * An error has been detected in the processing of MPTCP options.
+ * This is the usual reason code to return in the cases where a RST
+ * is being sent to close a subflow because of an invalid response.
+ */
+ SK_RST_REASON_MPTCP_RST_EMPTCP,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_ERESOURCE: Lack of resources.
+ * This code indicates that the sending host does not have enough
+ * resources to support the terminated subflow.
+ */
+ SK_RST_REASON_MPTCP_RST_ERESOURCE,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EPROHIBIT: Administratively prohibited.
+ * This code indicates that the requested subflow is prohibited by
+ * the policies of the sending host.
+ */
+ SK_RST_REASON_MPTCP_RST_EPROHIBIT,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EWQ2BIG: Too much outstanding data.
+ * This code indicates that there is an excessive amount of data
+ * that needs to be transmitted over the terminated subflow while
+ * having already been acknowledged over one or more other subflows.
+ * This may occur if a path has been unavailable for a short period
+ * and it is more efficient to reset and start again than it is to
+ * retransmit the queued data.
+ */
+ SK_RST_REASON_MPTCP_RST_EWQ2BIG,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EBADPERF: Unacceptable performance.
+ * This code indicates that the performance of this subflow was
+ * too low compared to the other subflows of this Multipath TCP
+ * connection.
+ */
+ SK_RST_REASON_MPTCP_RST_EBADPERF,
+ /**
+ * @SK_RST_REASON_MPTCP_RST_EMIDDLEBOX: Middlebox interference.
+ * Middlebox interference has been detected over this subflow,
+ * making MPTCP signaling invalid. For example, this may be sent
+ * if the checksum does not validate.
+ */
+ SK_RST_REASON_MPTCP_RST_EMIDDLEBOX,
+
+ /** @SK_RST_REASON_ERROR: unexpected error happens */
+ SK_RST_REASON_ERROR,
+
+ /**
+ * @SK_RST_REASON_MAX: Maximum of socket reset reasons.
+ * It shouldn't be used as a real 'reason'.
+ */
+ SK_RST_REASON_MAX,
+};
+
+/* Convert skb drop reasons to enum sk_rst_reason type */
+static inline enum sk_rst_reason
+sk_rst_convert_drop_reason(enum skb_drop_reason reason)
+{
+ switch (reason) {
+ case SKB_DROP_REASON_NOT_SPECIFIED:
+ return SK_RST_REASON_NOT_SPECIFIED;
+ case SKB_DROP_REASON_NO_SOCKET:
+ return SK_RST_REASON_NO_SOCKET;
+ case SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE:
+ return SK_RST_REASON_TCP_INVALID_ACK_SEQUENCE;
+ case SKB_DROP_REASON_TCP_RFC7323_PAWS:
+ return SK_RST_REASON_TCP_RFC7323_PAWS;
+ case SKB_DROP_REASON_TCP_TOO_OLD_ACK:
+ return SK_RST_REASON_TCP_TOO_OLD_ACK;
+ case SKB_DROP_REASON_TCP_ACK_UNSENT_DATA:
+ return SK_RST_REASON_TCP_ACK_UNSENT_DATA;
+ case SKB_DROP_REASON_TCP_FLAGS:
+ return SK_RST_REASON_TCP_FLAGS;
+ case SKB_DROP_REASON_TCP_OLD_ACK:
+ return SK_RST_REASON_TCP_OLD_ACK;
+ case SKB_DROP_REASON_TCP_ABORT_ON_DATA:
+ return SK_RST_REASON_TCP_ABORT_ON_DATA;
+ default:
+ /* If we don't have our own corresponding reason */
+ return SK_RST_REASON_NOT_SPECIFIED;
+ }
+}
+#endif
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index c1fa6fee0acf..2d3eb7cb4dff 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,6 +13,7 @@ enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
+ RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */
};
enum rtnl_kinds {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 326d3a322c10..a9d7e9ecee6b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
+#include <linux/xarray.h>
struct Qdisc_ops;
struct qdisc_walker;
@@ -116,6 +117,7 @@ struct Qdisc {
struct qdisc_skb_head q;
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
+ int owner;
unsigned long state;
unsigned long state2; /* must be written under qdisc spinlock */
struct Qdisc *next_sched;
@@ -238,12 +240,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
{
-#ifdef CONFIG_BQL
- /* Non-BQL migrated drivers will return 0, too. */
- return dql_avail(&txq->dql);
-#else
- return 0;
-#endif
+ return netdev_queue_dql_avail(txq);
}
struct Qdisc_class_ops {
@@ -325,7 +322,6 @@ struct Qdisc_ops {
struct module *owner;
};
-
struct tcf_result {
union {
struct {
@@ -333,7 +329,6 @@ struct tcf_result {
u32 classid;
};
const struct tcf_proto *goto_tp;
-
};
};
@@ -429,6 +424,8 @@ struct tcf_proto {
*/
spinlock_t lock;
bool deleting;
+ bool counted;
+ bool usesw;
refcount_t refcnt;
struct rcu_head rcu;
struct hlist_node destroy_ht_node;
@@ -463,6 +460,7 @@ struct tcf_chain {
};
struct tcf_block {
+ struct xarray ports; /* datapath accessible */
/* Lock protects tcf_block and lifetime-management data of chains
* attached to the block (refcnt, action_refcnt, explicitly_created).
*/
@@ -477,6 +475,7 @@ struct tcf_block {
struct flow_block flow_block;
struct list_head owner_list;
bool keep_dst;
+ atomic_t useswcnt;
atomic_t offloadcnt; /* Number of oddloaded filters */
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
@@ -489,6 +488,8 @@ struct tcf_block {
struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index);
+
static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
{
return lockdep_is_held(&chain->filter_chain_lock);
@@ -592,6 +593,7 @@ static inline void sch_tree_unlock(struct Qdisc *q)
extern struct Qdisc noop_qdisc;
extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
+extern const u8 sch_default_prio2band[TC_PRIO_MAX + 1];
extern struct Qdisc_ops mq_qdisc_ops;
extern struct Qdisc_ops noqueue_qdisc_ops;
extern const struct Qdisc_ops *default_qdisc_ops;
@@ -849,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
- __u64 bytes, __u32 packets)
+ __u64 bytes, __u64 packets)
{
u64_stats_update_begin(&bstats->syncp);
u64_stats_add(&bstats->bytes, bytes);
@@ -1029,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
return skb;
}
+static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue(&sch->gso_skb);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
+ if (direct)
+ return __qdisc_dequeue_head(&sch->q);
+ else
+ return sch->dequeue(sch);
+}
+
static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
{
struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
@@ -1041,6 +1058,37 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
return skb;
}
+struct tc_skb_cb {
+ struct qdisc_skb_cb qdisc_cb;
+ u32 drop_reason;
+
+ u16 zone; /* Only valid if post_ct = true */
+ u16 mru;
+ u8 post_ct:1;
+ u8 post_ct_snat:1;
+ u8 post_ct_dnat:1;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+ return cb;
+}
+
+static inline enum skb_drop_reason
+tcf_get_drop_reason(const struct sk_buff *skb)
+{
+ return tc_skb_cb(skb)->drop_reason;
+}
+
+static inline void tcf_set_drop_reason(const struct sk_buff *skb,
+ enum skb_drop_reason reason)
+{
+ tc_skb_cb(skb)->drop_reason = reason;
+}
+
/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
diff --git a/include/net/scm.h b/include/net/scm.h
index e8c76b4be2fe..0d35c7c77a74 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -5,6 +5,7 @@
#include <linux/limits.h>
#include <linux/net.h>
#include <linux/cred.h>
+#include <linux/file.h>
#include <linux/security.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
@@ -22,9 +23,20 @@ struct scm_creds {
kgid_t gid;
};
+#ifdef CONFIG_UNIX
+struct unix_edge;
+#endif
+
struct scm_fp_list {
short count;
+ short count_unix;
short max;
+#ifdef CONFIG_UNIX
+ bool inflight;
+ bool dead;
+ struct list_head vertices;
+ struct unix_edge *edges;
+#endif
struct user_struct *user;
struct file *fp[SCM_MAX_FD];
};
@@ -208,5 +220,13 @@ static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
scm_destroy_cred(scm);
}
+static inline int scm_recv_one_fd(struct file *f, int __user *ufd,
+ unsigned int flags)
+{
+ if (!ufd)
+ return -EFAULT;
+ return receive_fd(f, ufd, flags);
+}
+
#endif /* __LINUX_NET_SCM_H */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index a2310fa995f6..84e6b9fd5610 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -28,7 +28,7 @@
#define __net_sctp_h__
/* Header Strategy.
- * Start getting some control over the header file depencies:
+ * Start getting some control over the header file dependencies:
* includes
* constants
* structs
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index 572d73fdcd5e..8034bf5febbe 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -35,10 +35,10 @@ struct sctp_sched_ops {
struct sctp_chunk *(*dequeue)(struct sctp_outq *q);
/* Called only if the chunk fit the packet */
void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk);
- /* Sched all chunks already enqueued */
- void (*sched_all)(struct sctp_stream *steam);
- /* Unched all chunks already enqueued */
- void (*unsched_all)(struct sctp_stream *steam);
+ /* Schedule all chunks already enqueued */
+ void (*sched_all)(struct sctp_stream *stream);
+ /* Unschedule all chunks already enqueued */
+ void (*unsched_all)(struct sctp_stream *stream);
};
int sctp_sched_set_sched(struct sctp_association *asoc,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5a24d6d8522a..dcd288fa1bb6 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -242,10 +242,7 @@ struct sctp_sock {
int do_auto_asconf;
};
-static inline struct sctp_sock *sctp_sk(const struct sock *sk)
-{
- return (struct sctp_sock *)sk;
-}
+#define sctp_sk(ptr) container_of_const(ptr, struct sctp_sock, inet.sk)
static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp)
{
@@ -524,7 +521,7 @@ struct sctp_datamsg {
refcount_t refcnt;
/* When is this message no longer interesting to the peer? */
unsigned long expires_at;
- /* Did the messenge fail to send? */
+ /* Did the message fail to send? */
int send_error;
u8 send_failed:1,
can_delay:1, /* should this message be Nagle delayed */
@@ -778,6 +775,7 @@ struct sctp_transport {
/* Reference counting. */
refcount_t refcnt;
+ __u32 dead:1,
/* RTO-Pending : A flag used to track if one of the DATA
* chunks sent to this address is currently being
* used to compute a RTT. If this flag is 0,
@@ -787,7 +785,7 @@ struct sctp_transport {
* calculation completes (i.e. the DATA chunk
* is SACK'd) clear this flag.
*/
- __u32 rto_pending:1,
+ rto_pending:1,
/*
* hb_sent : a flag that signals that we have a pending
@@ -795,7 +793,7 @@ struct sctp_transport {
*/
hb_sent:1,
- /* Is the Path MTU update pending on this tranport */
+ /* Is the Path MTU update pending on this transport */
pmtu_pending:1,
dst_pending_confirm:1, /* need to confirm neighbour */
@@ -1226,7 +1224,7 @@ enum sctp_endpoint_type {
};
/*
- * A common base class to bridge the implmentation view of a
+ * A common base class to bridge the implementation view of a
* socket (usually listening) endpoint versus an association's
* local endpoint.
* This common structure is useful for several purposes:
@@ -1356,7 +1354,7 @@ struct sctp_endpoint {
struct rcu_head rcu;
};
-/* Recover the outter endpoint structure. */
+/* Recover the outer endpoint structure. */
static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
{
struct sctp_endpoint *ep;
@@ -1909,7 +1907,7 @@ struct sctp_association {
__u32 rwnd_over;
/* Keeps treack of rwnd pressure. This happens when we have
- * a window, but not recevie buffer (i.e small packets). This one
+ * a window, but not receive buffer (i.e small packets). This one
* is releases slowly (1 PMTU at a time ).
*/
__u32 rwnd_press;
@@ -1997,7 +1995,7 @@ struct sctp_association {
/* ADDIP Section 5.2 Upon reception of an ASCONF Chunk.
*
- * This is needed to implement itmes E1 - E4 of the updated
+ * This is needed to implement items E1 - E4 of the updated
* spec. Here is the justification:
*
* Since the peer may bundle multiple ASCONF chunks toward us,
@@ -2008,7 +2006,7 @@ struct sctp_association {
/* These ASCONF chunks are waiting to be sent.
*
- * These chunaks can't be pushed to outqueue until receiving
+ * These chunks can't be pushed to outqueue until receiving
* ASCONF_ACK for the previous ASCONF indicated by
* addip_last_asconf, so as to guarantee that only one ASCONF
* is in flight at any time.
@@ -2062,13 +2060,13 @@ struct sctp_association {
struct sctp_transport *new_transport;
/* SCTP AUTH: list of the endpoint shared keys. These
- * keys are provided out of band by the user applicaton
+ * keys are provided out of band by the user application
* and can't change during the lifetime of the association
*/
struct list_head endpoint_shared_keys;
/* SCTP AUTH:
- * The current generated assocaition shared key (secret)
+ * The current generated association shared key (secret)
*/
struct sctp_auth_bytes *asoc_shared_key;
struct sctp_shared_key *shkey;
@@ -2124,7 +2122,7 @@ enum {
SCTP_ASSOC_EYECATCHER = 0xa550c123,
};
-/* Recover the outter association structure. */
+/* Recover the outer association structure. */
static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base)
{
struct sctp_association *asoc;
diff --git a/include/net/seg6.h b/include/net/seg6.h
index af668f17b398..82b3fbbcbb93 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -52,10 +52,17 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
extern int seg6_init(void);
extern void seg6_exit(void);
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
extern int seg6_iptunnel_init(void);
extern void seg6_iptunnel_exit(void);
extern int seg6_local_init(void);
extern void seg6_local_exit(void);
+#else
+static inline int seg6_iptunnel_init(void) { return 0; }
+static inline void seg6_iptunnel_exit(void) {}
+static inline int seg6_local_init(void) { return 0; }
+static inline void seg6_local_exit(void) {}
+#endif
extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced);
extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags);
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
index 2b5d2ee5613e..24f733b3e3fe 100644
--- a/include/net/seg6_hmac.h
+++ b/include/net/seg6_hmac.h
@@ -49,9 +49,16 @@ extern int seg6_hmac_info_del(struct net *net, u32 key);
extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
struct ipv6_sr_hdr *srh);
extern bool seg6_hmac_validate_skb(struct sk_buff *skb);
+#ifdef CONFIG_IPV6_SEG6_HMAC
extern int seg6_hmac_init(void);
extern void seg6_hmac_exit(void);
extern int seg6_hmac_net_init(struct net *net);
extern void seg6_hmac_net_exit(struct net *net);
+#else
+static inline int seg6_hmac_init(void) { return 0; }
+static inline void seg6_hmac_exit(void) {}
+static inline int seg6_hmac_net_init(struct net *net) { return 0; }
+static inline void seg6_hmac_net_exit(struct net *net) {}
+#endif
#endif
diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
index 3fab9dec2ec4..888c1ce6f527 100644
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
@@ -19,6 +19,7 @@ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb);
struct seg6_bpf_srh_state {
+ local_lock_t bh_lock;
struct ipv6_sr_hdr *srh;
u16 hdrlen;
bool valid;
diff --git a/include/net/smc.h b/include/net/smc.h
index 6fef76087b9e..db84e4e35080 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -26,9 +26,6 @@ struct smc_hashinfo {
struct hlist_head ht;
};
-int smc_hash_sk(struct sock *sk);
-void smc_unhash_sk(struct sock *sk);
-
/* SMCD/ISM device driver interface */
struct smcd_dmb {
u64 dmb_tok;
@@ -50,7 +47,6 @@ struct smcd_dmb {
#define ISM_ERROR 0xFFFF
struct smcd_dev;
-struct ism_client;
struct smcd_gid {
u64 gid;
@@ -61,13 +57,12 @@ struct smcd_ops {
int (*query_remote_gid)(struct smcd_dev *dev, struct smcd_gid *rgid,
u32 vid_valid, u32 vid);
int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb,
- struct ism_client *client);
+ void *client);
int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
int (*supports_v2)(void);
- u8* (*get_system_eid)(void);
void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid);
u16 (*get_chid)(struct smcd_dev *dev);
struct device* (*get_dev)(struct smcd_dev *dev);
@@ -87,6 +82,7 @@ struct smcd_ops {
struct smcd_dev {
const struct smcd_ops *ops;
void *priv;
+ void *client;
struct list_head list;
spinlock_t lock;
struct smc_connection **conn;
diff --git a/include/net/sock.h b/include/net/sock.h
index dc625f94ee37..b7270b6b9e9c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -76,19 +76,6 @@
* the other protocols.
*/
-/* Define this to get the SOCK_DBG debugging facility. */
-#define SOCK_DEBUGGING
-#ifdef SOCK_DEBUGGING
-#define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \
- printk(KERN_DEBUG msg); } while (0)
-#else
-/* Validate arguments and do nothing */
-static inline __printf(2, 3)
-void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)
-{
-}
-#endif
-
/* This is the per-socket lock. The spinlock provides a synchronization
* between user contexts and software interrupt processing, whereas the
* mini-semaphore synchronizes multiple users amongst themselves.
@@ -277,8 +264,6 @@ struct sk_filter;
* @sk_pacing_status: Pacing status (requested, handled by sch_fq)
* @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
* @sk_sndbuf: size of send buffer in bytes
- * @__sk_flags_offset: empty field used to determine location of bitfield
- * @sk_padding: unused element for alignment
* @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
* @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
@@ -352,7 +337,9 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
- * @sk_bind2_node: bind node in the bhash2 table
+ * @sk_user_frags: xarray of pages the user is holding a reference on.
+ * @sk_owner: reference to the real owner of the socket that calls
+ * sock_lock_init_class_and_name().
*/
struct sock {
/*
@@ -394,14 +381,10 @@ struct sock {
#define sk_flags __sk_common.skc_flags
#define sk_rxhash __sk_common.skc_rxhash
- /* early demux fields */
- struct dst_entry __rcu *sk_rx_dst;
- int sk_rx_dst_ifindex;
- u32 sk_rx_dst_cookie;
+ __cacheline_group_begin(sock_write_rx);
- socket_lock_t sk_lock;
atomic_t sk_drops;
- int sk_rcvlowat;
+ __s32 sk_peek_off;
struct sk_buff_head sk_error_queue;
struct sk_buff_head sk_receive_queue;
/*
@@ -418,18 +401,24 @@ struct sock {
struct sk_buff *head;
struct sk_buff *tail;
} sk_backlog;
-
#define sk_rmem_alloc sk_backlog.rmem_alloc
- int sk_forward_alloc;
- u32 sk_reserved_mem;
+ __cacheline_group_end(sock_write_rx);
+
+ __cacheline_group_begin(sock_read_rx);
+ /* early demux fields */
+ struct dst_entry __rcu *sk_rx_dst;
+ int sk_rx_dst_ifindex;
+ u32 sk_rx_dst_cookie;
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_ll_usec;
- /* ===== mostly read cache line ===== */
unsigned int sk_napi_id;
+ u16 sk_busy_poll_budget;
+ u8 sk_prefer_busy_poll;
#endif
+ u8 sk_userlocks;
int sk_rcvbuf;
- int sk_disconnects;
struct sk_filter __rcu *sk_filter;
union {
@@ -438,15 +427,33 @@ struct sock {
struct socket_wq *sk_wq_raw;
/* public: */
};
+
+ void (*sk_data_ready)(struct sock *sk);
+ long sk_rcvtimeo;
+ int sk_rcvlowat;
+ __cacheline_group_end(sock_read_rx);
+
+ __cacheline_group_begin(sock_read_rxtx);
+ int sk_err;
+ struct socket *sk_socket;
+ struct mem_cgroup *sk_memcg;
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
+ __cacheline_group_end(sock_read_rxtx);
- struct dst_entry __rcu *sk_dst_cache;
+ __cacheline_group_begin(sock_write_rxtx);
+ socket_lock_t sk_lock;
+ u32 sk_reserved_mem;
+ int sk_forward_alloc;
+ u32 sk_tsflags;
+ __cacheline_group_end(sock_write_rxtx);
+
+ __cacheline_group_begin(sock_write_tx);
+ int sk_write_pending;
atomic_t sk_omem_alloc;
int sk_sndbuf;
- /* ===== cache line for TX ===== */
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags;
@@ -455,22 +462,36 @@ struct sock {
struct rb_root tcp_rtx_queue;
};
struct sk_buff_head sk_write_queue;
- __s32 sk_peek_off;
- int sk_write_pending;
- __u32 sk_dst_pending_confirm;
+ u32 sk_dst_pending_confirm;
u32 sk_pacing_status; /* see enum sk_pacing */
- long sk_sndtimeo;
+ struct page_frag sk_frag;
struct timer_list sk_timer;
- __u32 sk_priority;
- __u32 sk_mark;
+
unsigned long sk_pacing_rate; /* bytes per second */
+ atomic_t sk_zckey;
+ atomic_t sk_tskey;
+ __cacheline_group_end(sock_write_tx);
+
+ __cacheline_group_begin(sock_read_tx);
unsigned long sk_max_pacing_rate;
- struct page_frag sk_frag;
+ long sk_sndtimeo;
+ u32 sk_priority;
+ u32 sk_mark;
+ struct dst_entry __rcu *sk_dst_cache;
netdev_features_t sk_route_caps;
- int sk_gso_type;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+#endif
+ u16 sk_gso_type;
+ u16 sk_gso_max_segs;
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
- __u32 sk_txhash;
+ u32 sk_txhash;
+ u8 sk_pacing_shift;
+ bool sk_use_task_frag;
+ __cacheline_group_end(sock_read_tx);
/*
* Because of non atomicity rules, all
@@ -479,64 +500,44 @@ struct sock {
u8 sk_gso_disabled : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
- sk_no_check_rx : 1,
- sk_userlocks : 4;
- u8 sk_pacing_shift;
+ sk_no_check_rx : 1;
+ u8 sk_shutdown;
u16 sk_type;
u16 sk_protocol;
- u16 sk_gso_max_segs;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
- int sk_err,
- sk_err_soft;
+ int sk_err_soft;
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
- u8 sk_txrehash;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- u8 sk_prefer_busy_poll;
- u16 sk_busy_poll_budget;
-#endif
spinlock_t sk_peer_lock;
int sk_bind_phc;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
- long sk_rcvtimeo;
ktime_t sk_stamp;
#if BITS_PER_LONG==32
seqlock_t sk_stamp_seq;
#endif
- atomic_t sk_tskey;
- atomic_t sk_zckey;
- u32 sk_tsflags;
- u8 sk_shutdown;
+ int sk_disconnects;
+ u8 sk_txrehash;
u8 sk_clockid;
u8 sk_txtime_deadline_mode : 1,
sk_txtime_report_errors : 1,
sk_txtime_unused : 6;
- bool sk_use_task_frag;
- struct socket *sk_socket;
void *sk_user_data;
#ifdef CONFIG_SECURITY
void *sk_security;
#endif
struct sock_cgroup_data sk_cgrp_data;
- struct mem_cgroup *sk_memcg;
void (*sk_state_change)(struct sock *sk);
- void (*sk_data_ready)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
-#ifdef CONFIG_SOCK_VALIDATE_XMIT
- struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
- struct net_device *dev,
- struct sk_buff *skb);
-#endif
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
@@ -544,7 +545,16 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
- struct hlist_node sk_bind2_node;
+ struct xarray sk_user_frags;
+
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+ struct module *sk_owner;
+#endif
+};
+
+struct sock_bh_locked {
+ struct sock *sock;
+ local_lock_t bh_lock;
};
enum sk_pacing {
@@ -873,16 +883,6 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
-static inline void __sk_del_bind2_node(struct sock *sk)
-{
- __hlist_del(&sk->sk_bind2_node);
-}
-
-static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
-{
- hlist_add_head(&sk->sk_bind2_node, list);
-}
-
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -900,8 +900,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
-#define sk_for_each_bound_bhash2(__sk, list) \
- hlist_for_each_entry(__sk, list, sk_bind2_node)
#define sk_for_each_bound_safe(__sk, tmp, list) \
hlist_for_each_entry_safe(__sk, tmp, list, sk_bind_node)
@@ -1134,41 +1132,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
-static inline void sock_rps_record_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, hash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void sock_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
- */
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
- }
-#endif
-}
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
@@ -1246,6 +1209,13 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
size - offsetof(struct sock, sk_node.pprev));
}
+struct proto_accept_arg {
+ int flags;
+ int err;
+ int is_empty;
+ bool kern;
+};
+
/* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface
*/
@@ -1260,8 +1230,8 @@ struct proto {
int addr_len);
int (*disconnect)(struct sock *sk, int flags);
- struct sock * (*accept)(struct sock *sk, int flags, int *err,
- bool kern);
+ struct sock * (*accept)(struct sock *sk,
+ struct proto_accept_arg *arg);
int (*ioctl)(struct sock *sk, int cmd,
int *karg);
@@ -1423,75 +1393,6 @@ static inline int sk_under_cgroup_hierarchy(struct sock *sk,
#endif
}
-static inline bool sk_has_memory_pressure(const struct sock *sk)
-{
- return sk->sk_prot->memory_pressure != NULL;
-}
-
-static inline bool sk_under_global_memory_pressure(const struct sock *sk)
-{
- return sk->sk_prot->memory_pressure &&
- !!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline bool sk_under_memory_pressure(const struct sock *sk)
-{
- if (!sk->sk_prot->memory_pressure)
- return false;
-
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg))
- return true;
-
- return !!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline long
-proto_memory_allocated(const struct proto *prot)
-{
- return max(0L, atomic_long_read(prot->memory_allocated));
-}
-
-static inline long
-sk_memory_allocated(const struct sock *sk)
-{
- return proto_memory_allocated(sk->sk_prot);
-}
-
-/* 1 MB per cpu, in page units */
-#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
-extern int sysctl_mem_pcpu_rsv;
-
-static inline void proto_memory_pcpu_drain(struct proto *proto)
-{
- int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
-
- if (val)
- atomic_long_add(val, proto->memory_allocated);
-}
-
-static inline void
-sk_memory_allocated_add(const struct sock *sk, int val)
-{
- struct proto *proto = sk->sk_prot;
-
- val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
-
- if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv)))
- proto_memory_pcpu_drain(proto);
-}
-
-static inline void
-sk_memory_allocated_sub(const struct sock *sk, int val)
-{
- struct proto *proto = sk->sk_prot;
-
- val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
-
- if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv)))
- proto_memory_pcpu_drain(proto);
-}
-
#define SK_ALLOC_PERCPU_COUNTER_BATCH 16
static inline void sk_sockets_allocated_dec(struct sock *sk)
@@ -1518,15 +1419,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
return percpu_counter_sum_positive(prot->sockets_allocated);
}
-static inline bool
-proto_memory_pressure(struct proto *prot)
-{
- if (!prot->memory_pressure)
- return false;
- return !!READ_ONCE(*prot->memory_pressure);
-}
-
-
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
@@ -1699,6 +1591,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
sk_mem_reclaim(sk);
}
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+ __module_get(owner);
+ sk->sk_owner = owner;
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+ sk->sk_owner = NULL;
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+ module_put(sk->sk_owner);
+}
+#else
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+}
+#endif
/*
* Macro so as to not evaluate some arguments when
* lockdep is not enabled.
@@ -1708,13 +1629,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
*/
#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \
do { \
+ sk_owner_set(sk, THIS_MODULE); \
sk->sk_lock.owned = 0; \
init_waitqueue_head(&sk->sk_lock.wq); \
spin_lock_init(&(sk)->sk_lock.slock); \
debug_check_no_locks_freed((void *)&(sk)->sk_lock, \
- sizeof((sk)->sk_lock)); \
+ sizeof((sk)->sk_lock)); \
lockdep_set_class_and_name(&(sk)->sk_lock.slock, \
- (skey), (sname)); \
+ (skey), (sname)); \
lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
} while (0)
@@ -1748,7 +1670,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
* lock_sock_fast - fast version of lock_sock
* @sk: socket
*
- * This version should be used for very small section, where process wont block
+ * This version should be used for very small section, where process won't block
* return false if fast path is taken:
*
* sk_lock.slock locked, owned = 0, BH disabled
@@ -1839,12 +1761,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
static inline void sock_release_ownership(struct sock *sk)
{
- if (sock_owned_by_user_nocheck(sk)) {
- sk->sk_lock.owned = 0;
+ DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk));
+ sk->sk_lock.owned = 0;
- /* The sk_lock has mutex_unlock() semantics: */
- mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
- }
+ /* The sk_lock has mutex_unlock() semantics: */
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
}
/* no reclassification while locks are held */
@@ -1941,7 +1862,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
int sock_no_bind(struct socket *, struct sockaddr *, int);
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int, bool);
+int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *);
int sock_no_getname(struct socket *, struct sockaddr *, int);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
int sock_no_listen(struct socket *, int);
@@ -2262,7 +2183,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
}
}
-bool sk_mc_loop(struct sock *sk);
+bool sk_mc_loop(const struct sock *sk);
static inline bool sk_can_gso(const struct sock *sk)
{
@@ -2567,6 +2488,12 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)
}
}
+static inline void sk_wake_async_rcu(const struct sock *sk, int how, int band)
+{
+ if (unlikely(sock_flag(sk, SOCK_FASYNC)))
+ sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
+}
+
/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
* need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak.
* Note: for send buffers, TCP works better if we can build two skbs at
@@ -2665,7 +2592,7 @@ struct sock_skb_cb {
/* Store sock_skb_cb at the end of skb->cb[] so protocol families
* using skb->cb[] would keep using it directly and utilize its
- * alignement guarantee.
+ * alignment guarantee.
*/
#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \
sizeof(struct sock_skb_cb)))
@@ -2876,31 +2803,6 @@ sk_is_refcounted(struct sock *sk)
return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
}
-/**
- * skb_steal_sock - steal a socket from an sk_buff
- * @skb: sk_buff to steal the socket from
- * @refcounted: is set to true if the socket is reference-counted
- * @prefetched: is set to true if the socket was assigned from bpf
- */
-static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
-{
- if (skb->sk) {
- struct sock *sk = skb->sk;
-
- *refcounted = true;
- *prefetched = skb_sk_is_prefetched(skb);
- if (*prefetched)
- *refcounted = sk_is_refcounted(sk);
- skb->destructor = NULL;
- skb->sk = NULL;
- return sk;
- }
- *prefetched = false;
- *refcounted = false;
- return NULL;
-}
-
/* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev.
* Check decrypted mark in case skb_orphan() cleared socket.
@@ -2913,12 +2815,10 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
skb = sk->sk_validate_xmit_skb(sk, dev, skb);
-#ifdef CONFIG_TLS_DEVICE
- } else if (unlikely(skb->decrypted)) {
+ } else if (unlikely(skb_is_decrypted(skb))) {
pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
kfree_skb(skb);
skb = NULL;
-#endif
}
#endif
@@ -2958,7 +2858,6 @@ extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern int sysctl_tstamp_allow_data;
-extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
@@ -3043,8 +2942,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
- if (sk->sk_prot->sock_is_readable)
- return sk->sk_prot->sock_is_readable(sk);
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot->sock_is_readable)
+ return prot->sock_is_readable(sk);
+
return false;
}
#endif /* _SOCK_H */
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 6ec140b0a61b..6e4faf3ee76f 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -26,7 +26,7 @@ struct sock_reuseport {
unsigned int bind_inany:1;
unsigned int has_conns:1;
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
- struct sock *socks[]; /* array of sock pointers */
+ struct sock *socks[] __counted_by(max_socks);
};
extern int reuseport_alloc(struct sock *sk, bool bind_inany);
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 41e2ce9e9e10..0a83010b3a64 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -43,6 +43,8 @@ struct strparser;
struct strp_callbacks {
int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+ int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
int (*read_sock_done)(struct strparser *strp, int err);
void (*abort_parser)(struct strparser *strp, int err);
void (*lock)(struct strparser *strp);
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index 1dc2f827d0bc..77f87c622a2e 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -22,6 +22,7 @@ struct tcf_ct_params {
struct nf_nat_range2 range;
bool ipv4_range;
+ bool put_labels;
u16 ct_action;
diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
deleted file mode 100644
index 4225fcb1c6ba..000000000000
--- a/include/net/tc_act/tc_ipt.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NET_TC_IPT_H
-#define __NET_TC_IPT_H
-
-#include <net/act_api.h>
-
-struct xt_entry_target;
-
-struct tcf_ipt {
- struct tc_action common;
- u32 tcfi_hook;
- char *tcfi_tname;
- struct xt_entry_target *tcfi_t;
-};
-#define to_ipt(a) ((struct tcf_ipt *)a)
-
-#endif /* __NET_TC_IPT_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 32ce8ea36950..75722d967bf2 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,6 +8,7 @@
struct tcf_mirred {
struct tc_action common;
int tcfm_eaction;
+ u32 tcfm_blockid;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
netdevice_tracker tcfm_dev_tracker;
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index a6d481b5bcbc..ffe58a02537c 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -4,7 +4,7 @@
#include <net/pkt_cls.h>
-#if IS_ENABLED(CONFIG_RETPOLINE)
+#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
#include <linux/cpufeature.h>
#include <linux/static_key.h>
@@ -117,10 +117,6 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
if (a->ops->act == tcf_ife_act)
return tcf_ife_act(skb, a, res);
#endif
-#if IS_BUILTIN(CONFIG_NET_ACT_IPT)
- if (a->ops->act == tcf_ipt_act)
- return tcf_ipt_act(skb, a, res);
-#endif
#if IS_BUILTIN(CONFIG_NET_ACT_SIMP)
if (a->ops->act == tcf_simp_act)
return tcf_simp_act(skb, a, res);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b3917af309e0..3255a199ef60 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -37,9 +37,11 @@
#include <net/snmp.h>
#include <net/ip.h>
#include <net/tcp_states.h>
+#include <net/tcp_ao.h>
#include <net/inet_ecn.h>
#include <net/dst.h>
#include <net/mptcp.h>
+#include <net/xfrm.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -51,6 +53,8 @@ extern struct inet_hashinfo tcp_hashinfo;
DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
int tcp_orphan_count_sum(void);
+DECLARE_PER_CPU(u32, tcp_tw_isn);
+
void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@@ -131,6 +135,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
+static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
+
#if HZ >= 100
#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
#define TCP_ATO_MIN ((unsigned)(HZ/25))
@@ -164,7 +170,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_KEEPCNT 127
#define MAX_TCP_SYNCNT 127
-#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
+/* Ensure that TCP PAWS checks are relaxed after ~2147 seconds
+ * to avoid overflows. This assumes a clock smaller than 1 Mhz.
+ * Default clock is 1 Khz, tcp_usec_ts uses 1 Mhz.
+ */
+#define TCP_PAWS_WRAP (INT_MAX / USEC_PER_SEC)
+
#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
* after this time. It should be equal
* (or greater than) TCP_TIMEWAIT_LEN
@@ -187,6 +198,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_AO 29 /* Authentication Option (RFC5925) */
#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */
@@ -285,14 +297,6 @@ static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
return seq3 - seq2 >= seq1 - seq2;
}
-static inline bool tcp_out_of_memory(struct sock *sk)
-{
- if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
- sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
- return true;
- return false;
-}
-
static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sk_wmem_queued_add(sk, -skb->truesize);
@@ -305,7 +309,7 @@ static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
void sk_forced_mem_schedule(struct sock *sk, int size);
-bool tcp_check_oom(struct sock *sk, int shift);
+bool tcp_check_oom(const struct sock *sk, int shift);
extern struct proto tcp_prot;
@@ -339,7 +343,7 @@ void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
+enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
@@ -383,12 +387,13 @@ enum tcp_tw_status {
enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb,
- const struct tcphdr *th);
+ const struct tcphdr *th,
+ u32 *tw_isn);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
bool *lost_race);
-int tcp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb);
+enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag);
void tcp_clear_retrans(struct tcp_sock *tp);
@@ -429,7 +434,6 @@ int tcp_mmap(struct file *file, struct socket *sock,
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
* BPF SKB-less helpers
@@ -482,13 +486,30 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
/* From syncookies.c */
struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst, u32 tsoff);
-int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
- u32 cookie);
+ struct dst_entry *dst);
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
- const struct tcp_request_sock_ops *af_ops,
- struct sock *sk, struct sk_buff *skb);
+ struct sock *sk, struct sk_buff *skb,
+ struct tcp_options_received *tcp_opt,
+ int mss, u32 tsoff);
+
+#if IS_ENABLED(CONFIG_BPF)
+struct bpf_tcp_req_attrs {
+ u32 rcv_tsval;
+ u32 rcv_tsecr;
+ u16 mss;
+ u8 rcv_wscale;
+ u8 snd_wscale;
+ u8 ecn_ok;
+ u8 wscale_ok;
+ u8 sack_ok;
+ u8 tstamp_ok;
+ u8 usec_ts_ok;
+ u8 reserved[3];
+};
+#endif
+
#ifdef CONFIG_SYN_COOKIES
/* Syncookies use a monotonic timer which increments every 60 seconds.
@@ -568,18 +589,50 @@ static inline u32 tcp_cookie_time(void)
return val;
}
+/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
+static inline u64 tcp_ns_to_ts(bool usec_ts, u64 val)
+{
+ if (usec_ts)
+ return div_u64(val, NSEC_PER_USEC);
+
+ return div_u64(val, NSEC_PER_MSEC);
+}
+
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
u64 cookie_init_timestamp(struct request_sock *req, u64 now);
bool cookie_timestamp_decode(const struct net *net,
struct tcp_options_received *opt);
-bool cookie_ecn_ok(const struct tcp_options_received *opt,
- const struct net *net, const struct dst_entry *dst);
+
+static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst)
+{
+ return READ_ONCE(net->ipv4.sysctl_tcp_ecn) ||
+ dst_feature(dst, RTAX_FEATURE_ECN);
+}
+
+#if IS_ENABLED(CONFIG_BPF)
+static inline bool cookie_bpf_ok(struct sk_buff *skb)
+{
+ return skb->sk;
+}
+
+struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb);
+#else
+static inline bool cookie_bpf_ok(struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return NULL;
+}
+#endif
/* From net/ipv6/syncookies.c */
-int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
- u32 cookie);
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
@@ -610,7 +663,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
void tcp_send_probe0(struct sock *);
int tcp_write_wakeup(struct sock *, int mib);
void tcp_send_fin(struct sock *sk);
-void tcp_send_active_reset(struct sock *sk, gfp_t priority);
+void tcp_send_active_reset(struct sock *sk, gfp_t priority,
+ enum sk_rst_reason reason);
int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
@@ -630,6 +684,19 @@ void tcp_fin(struct sock *sk);
void tcp_check_space(struct sock *sk);
void tcp_sack_compress_send_ack(struct sock *sk);
+static inline void tcp_cleanup_skb(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ secpath_reset(skb);
+}
+
+static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb)
+{
+ DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
+ DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb));
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+}
+
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
static inline void tcp_clear_xmit_timers(struct sock *sk)
@@ -676,6 +743,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
void tcp_read_done(struct sock *sk, size_t len);
@@ -686,7 +756,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu);
int tcp_mss_to_mtu(struct sock *sk, int mss);
void tcp_mtup_init(struct sock *sk);
-static inline void tcp_bound_rto(const struct sock *sk)
+static inline void tcp_bound_rto(struct sock *sk)
{
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
@@ -727,7 +797,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
u32 tcp_delack_max(const struct sock *sk);
/* Compute the actual rto_min value */
-static inline u32 tcp_rto_min(struct sock *sk)
+static inline u32 tcp_rto_min(const struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
u32 rto_min = inet_csk(sk)->icsk_rto_min;
@@ -737,7 +807,7 @@ static inline u32 tcp_rto_min(struct sock *sk)
return rto_min;
}
-static inline u32 tcp_rto_min_us(struct sock *sk)
+static inline u32 tcp_rto_min_us(const struct sock *sk)
{
return jiffies_to_usecs(tcp_rto_min(sk));
}
@@ -797,22 +867,31 @@ static inline u64 tcp_clock_us(void)
return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
}
-/* This should only be used in contexts where tp->tcp_mstamp is up to date */
-static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
+static inline u64 tcp_clock_ms(void)
+{
+ return div_u64(tcp_clock_ns(), NSEC_PER_MSEC);
+}
+
+/* TCP Timestamp included in TS option (RFC 1323) can either use ms
+ * or usec resolution. Each socket carries a flag to select one or other
+ * resolution, as the route attribute could change anytime.
+ * Each flow must stick to initial resolution.
+ */
+static inline u32 tcp_clock_ts(bool usec_ts)
{
- return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+ return usec_ts ? tcp_clock_us() : tcp_clock_ms();
}
-/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */
-static inline u64 tcp_ns_to_ts(u64 ns)
+static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp)
{
- return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ);
+ return div_u64(tp->tcp_mstamp, USEC_PER_MSEC);
}
-/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
-static inline u32 tcp_time_stamp_raw(void)
+static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp)
{
- return tcp_ns_to_ts(tcp_clock_ns());
+ if (tp->tcp_usec_ts)
+ return tp->tcp_mstamp;
+ return tcp_time_stamp_ms(tp);
}
void tcp_mstamp_refresh(struct tcp_sock *tp);
@@ -822,17 +901,30 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
return max_t(s64, t1 - t0, 0);
}
-static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
-{
- return tcp_ns_to_ts(skb->skb_mstamp_ns);
-}
-
/* provide the departure time in us unit */
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
{
return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
}
+/* Provide skb TSval in usec or ms unit */
+static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb)
+{
+ if (usec_ts)
+ return tcp_skb_timestamp_us(skb);
+
+ return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC);
+}
+
+static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw)
+{
+ return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset;
+}
+
+static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
+{
+ return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off;
+}
#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
@@ -847,6 +939,19 @@ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
+/* State flags for sacked in struct tcp_skb_cb */
+enum tcp_skb_cb_sacked_flags {
+ TCPCB_SACKED_ACKED = (1 << 0), /* SKB ACK'd by a SACK block */
+ TCPCB_SACKED_RETRANS = (1 << 1), /* SKB retransmitted */
+ TCPCB_LOST = (1 << 2), /* SKB is lost */
+ TCPCB_TAGBITS = (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS |
+ TCPCB_LOST), /* All tag bits */
+ TCPCB_REPAIRED = (1 << 4), /* SKB repaired (no skb_mstamp_ns) */
+ TCPCB_EVER_RETRANS = (1 << 7), /* Ever retransmitted frame */
+ TCPCB_RETRANS = (TCPCB_SACKED_RETRANS | TCPCB_EVER_RETRANS |
+ TCPCB_REPAIRED),
+};
+
/* This is what the send packet queuing engine uses to pass
* TCP per-packet control information to the transmission code.
* We also store the host-order sequence numbers in here too.
@@ -857,13 +962,10 @@ struct tcp_skb_cb {
__u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
union {
- /* Note : tcp_tw_isn is used in input path only
- * (isn chosen by tcp_timewait_state_process())
- *
+ /* Note :
* tcp_gso_segs/size are used in write queue only,
* cf tcp_skb_pcount()/tcp_skb_mss()
*/
- __u32 tcp_tw_isn;
struct {
u16 tcp_gso_segs;
u16 tcp_gso_size;
@@ -872,15 +974,6 @@ struct tcp_skb_cb {
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
__u8 sacked; /* State flags for SACK. */
-#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
-#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
-#define TCPCB_LOST 0x04 /* SKB is lost */
-#define TCPCB_TAGBITS 0x07 /* All tag bits */
-#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */
-#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
-#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
- TCPCB_REPAIRED)
-
__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
__u8 txstamp_ack:1, /* Record TX timestamp for ack? */
eor:1, /* Is skb MSG_EOR marked? */
@@ -990,9 +1083,18 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
const struct sk_buff *from)
{
+ /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */
return likely(tcp_skb_can_collapse_to(to) &&
mptcp_skb_can_collapse(to, from) &&
- skb_pure_zcopy_same(to, from));
+ skb_pure_zcopy_same(to, from) &&
+ skb_frags_readable(to) == skb_frags_readable(from));
+}
+
+static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return likely(mptcp_skb_can_collapse(to, from) &&
+ !skb_cmp_decrypted(to, from));
}
/* Events passed to congestion control interface */
@@ -1089,7 +1191,7 @@ struct tcp_congestion_ops {
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
- void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
+ void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs);
/* new value of cwnd after loss (required) */
@@ -1604,7 +1706,7 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
return true;
if (unlikely(!time_before32(ktime_get_seconds(),
- rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
+ rx_opt->ts_recent_stamp + TCP_PAWS_WRAP)))
return true;
/*
* Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1664,12 +1766,7 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
tp->retransmit_skb_hint = NULL;
}
-union tcp_md5_addr {
- struct in_addr a4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr a6;
-#endif
-};
+#define tcp_md5_addr tcp_ao_addr
/* - key database */
struct tcp_md5sig_key {
@@ -1713,12 +1810,39 @@ union tcp_md5sum_block {
#endif
};
-/* - pool: digest algorithm, hash description and scratch buffer */
-struct tcp_md5sig_pool {
- struct ahash_request *md5_req;
- void *scratch;
+/*
+ * struct tcp_sigpool - per-CPU pool of ahash_requests
+ * @scratch: per-CPU temporary area, that can be used between
+ * tcp_sigpool_start() and tcp_sigpool_end() to perform
+ * crypto request
+ * @req: pre-allocated ahash request
+ */
+struct tcp_sigpool {
+ void *scratch;
+ struct ahash_request *req;
};
+int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size);
+void tcp_sigpool_get(unsigned int id);
+void tcp_sigpool_release(unsigned int id);
+int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp,
+ const struct sk_buff *skb,
+ unsigned int header_len);
+
+/**
+ * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash
+ * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
+ * @c: returned tcp_sigpool for usage (uninitialized on failure)
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
+/**
+ * tcp_sigpool_end - enable bh and stop using tcp_sigpool
+ * @c: tcp_sigpool context that was returned by tcp_sigpool_start()
+ */
+void tcp_sigpool_end(struct tcp_sigpool *c);
+size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len);
/* - functions */
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
@@ -1731,29 +1855,31 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen, int l3index, u8 flags);
+void tcp_clear_md5_list(struct sock *sk);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
#ifdef CONFIG_TCP_MD5SIG
-#include <linux/jump_label.h>
-extern struct static_key_false_deferred tcp_md5_needed;
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
- int family);
+ int family, bool any_l3index);
static inline struct tcp_md5sig_key *
tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr, int family)
{
if (!static_branch_unlikely(&tcp_md5_needed.key))
return NULL;
- return __tcp_md5_do_lookup(sk, l3index, addr, family);
+ return __tcp_md5_do_lookup(sk, l3index, addr, family, false);
}
-enum skb_drop_reason
-tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
- const void *saddr, const void *daddr,
- int family, int dif, int sdif);
-
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup_any_l3index(const struct sock *sk,
+ const union tcp_md5_addr *addr, int family)
+{
+ if (!static_branch_unlikely(&tcp_md5_needed.key))
+ return NULL;
+ return __tcp_md5_do_lookup(sk, 0, addr, family, true);
+}
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
#else
@@ -1764,27 +1890,22 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index,
return NULL;
}
-static inline enum skb_drop_reason
-tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
- const void *saddr, const void *daddr,
- int family, int dif, int sdif)
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup_any_l3index(const struct sock *sk,
+ const union tcp_md5_addr *addr, int family)
{
- return SKB_NOT_DROPPED_YET;
+ return NULL;
}
+
#define tcp_twsk_md5_key(twsk) NULL
#endif
-bool tcp_alloc_md5sig_pool(void);
-
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
-static inline void tcp_put_md5sig_pool(void)
-{
- local_bh_enable();
-}
+int tcp_md5_alloc_sigpool(void);
+void tcp_md5_release_sigpool(void);
+void tcp_md5_add_sigpool(void);
+extern int tcp_md5_sigpool_id;
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
- unsigned int header_len);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
+int tcp_md5_hash_key(struct tcp_sigpool *hp,
const struct tcp_md5sig_key *key);
/* From tcp_fastopen.c */
@@ -1987,6 +2108,14 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc
tcp_wmem_free_skb(sk, skb);
}
+static inline void tcp_write_collapse_fence(struct sock *sk)
+{
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
+
+ if (skb)
+ TCP_SKB_CB(skb)->eor = 1;
+}
+
static inline void tcp_push_pending_frames(struct sock *sk)
{
if (tcp_send_head(sk)) {
@@ -2084,12 +2213,19 @@ void tcp_v4_destroy_sock(struct sock *sk);
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb);
+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th);
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
+#ifdef CONFIG_INET
void tcp_gro_complete(struct sk_buff *skb);
+#else
+static inline void tcp_gro_complete(struct sk_buff *skb) { }
+#endif
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
@@ -2129,6 +2265,18 @@ struct tcp_sock_af_ops {
sockptr_t optval,
int optlen);
#endif
+#ifdef CONFIG_TCP_AO
+ int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen);
+ struct tcp_ao_key *(*ao_lookup)(const struct sock *sk,
+ struct sock *addr_sk,
+ int sndid, int rcvid);
+ int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk,
+ __be32 sisn, __be32 disn, bool send);
+ int (*calc_ao_hash)(char *location, struct tcp_ao_key *ao,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+#endif
};
struct tcp_request_sock_ops {
@@ -2141,6 +2289,15 @@ struct tcp_request_sock_ops {
const struct sock *sk,
const struct sk_buff *skb);
#endif
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_key *(*ao_lookup)(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid);
+ int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk);
+ int (*ao_synack_hash)(char *ao_hash, struct tcp_ao_key *mkt,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne);
+#endif
#ifdef CONFIG_SYN_COOKIES
__u32 (*cookie_init_seq)(const struct sk_buff *skb,
__u16 *mss);
@@ -2148,7 +2305,8 @@ struct tcp_request_sock_ops {
struct dst_entry *(*route_req)(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req);
+ struct request_sock *req,
+ u32 tw_isn);
u32 (*init_seq)(const struct sk_buff *skb);
u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
@@ -2181,6 +2339,70 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
}
#endif
+struct tcp_key {
+ union {
+ struct {
+ struct tcp_ao_key *ao_key;
+ char *traffic_key;
+ u32 sne;
+ u8 rcv_next;
+ };
+ struct tcp_md5sig_key *md5_key;
+ };
+ enum {
+ TCP_KEY_NONE = 0,
+ TCP_KEY_MD5,
+ TCP_KEY_AO,
+ } type;
+};
+
+static inline void tcp_get_current_key(const struct sock *sk,
+ struct tcp_key *out)
+{
+#if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG)
+ const struct tcp_sock *tp = tcp_sk(sk);
+#endif
+
+#ifdef CONFIG_TCP_AO
+ if (static_branch_unlikely(&tcp_ao_needed.key)) {
+ struct tcp_ao_info *ao;
+
+ ao = rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held(sk));
+ if (ao) {
+ out->ao_key = READ_ONCE(ao->current_key);
+ out->type = TCP_KEY_AO;
+ return;
+ }
+ }
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ if (static_branch_unlikely(&tcp_md5_needed.key) &&
+ rcu_access_pointer(tp->md5sig_info)) {
+ out->md5_key = tp->af_specific->md5_lookup(sk, sk);
+ if (out->md5_key) {
+ out->type = TCP_KEY_MD5;
+ return;
+ }
+ }
+#endif
+ out->type = TCP_KEY_NONE;
+}
+
+static inline bool tcp_key_is_md5(const struct tcp_key *key)
+{
+ if (static_branch_tcp_md5())
+ return key->type == TCP_KEY_MD5;
+ return false;
+}
+
+static inline bool tcp_key_is_ao(const struct tcp_key *key)
+{
+ if (static_branch_tcp_ao())
+ return key->type == TCP_KEY_AO;
+ return false;
+}
+
int tcpv4_offload_init(void);
void tcp_v4_init(void);
@@ -2390,6 +2612,11 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+#ifdef CONFIG_BPF_STREAM_PARSER
+struct strparser;
+int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
+#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET
@@ -2517,10 +2744,10 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
-static inline void tcp_bpf_rtt(struct sock *sk)
+static inline void tcp_bpf_rtt(struct sock *sk, long mrtt, u32 srtt)
{
if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG))
- tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
+ tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_RTT_CB, mrtt, srtt);
}
#if IS_ENABLED(CONFIG_SMC)
@@ -2556,4 +2783,59 @@ static inline u64 tcp_transmit_time(const struct sock *sk)
return 0;
}
+static inline int tcp_parse_auth_options(const struct tcphdr *th,
+ const u8 **md5_hash, const struct tcp_ao_hdr **aoh)
+{
+ const u8 *md5_tmp, *ao_tmp;
+ int ret;
+
+ ret = tcp_do_parse_auth_options(th, &md5_tmp, &ao_tmp);
+ if (ret)
+ return ret;
+
+ if (md5_hash)
+ *md5_hash = md5_tmp;
+
+ if (aoh) {
+ if (!ao_tmp)
+ *aoh = NULL;
+ else
+ *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2);
+ }
+
+ return 0;
+}
+
+static inline bool tcp_ao_required(struct sock *sk, const void *saddr,
+ int family, int l3index, bool stat_inc)
+{
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao_info;
+ struct tcp_ao_key *ao_key;
+
+ if (!static_branch_unlikely(&tcp_ao_needed.key))
+ return false;
+
+ ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ if (!ao_info)
+ return false;
+
+ ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1);
+ if (ao_info->ao_required || ao_key) {
+ if (stat_inc) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED);
+ atomic64_inc(&ao_info->counters.ao_required);
+ }
+ return true;
+ }
+#endif
+ return false;
+}
+
+enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
+ const struct request_sock *req, const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int dif, int sdif);
+
#endif /* _TCP_H */
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
new file mode 100644
index 000000000000..df655ce6987d
--- /dev/null
+++ b/include/net/tcp_ao.h
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _TCP_AO_H
+#define _TCP_AO_H
+
+#define TCP_AO_KEY_ALIGN 1
+#define __tcp_ao_key_align __aligned(TCP_AO_KEY_ALIGN)
+
+union tcp_ao_addr {
+ struct in_addr a4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr a6;
+#endif
+};
+
+struct tcp_ao_hdr {
+ u8 kind;
+ u8 length;
+ u8 keyid;
+ u8 rnext_keyid;
+};
+
+static inline u8 tcp_ao_hdr_maclen(const struct tcp_ao_hdr *aoh)
+{
+ return aoh->length - sizeof(struct tcp_ao_hdr);
+}
+
+struct tcp_ao_counters {
+ atomic64_t pkt_good;
+ atomic64_t pkt_bad;
+ atomic64_t key_not_found;
+ atomic64_t ao_required;
+ atomic64_t dropped_icmp;
+};
+
+struct tcp_ao_key {
+ struct hlist_node node;
+ union tcp_ao_addr addr;
+ u8 key[TCP_AO_MAXKEYLEN] __tcp_ao_key_align;
+ unsigned int tcp_sigpool_id;
+ unsigned int digest_size;
+ int l3index;
+ u8 prefixlen;
+ u8 family;
+ u8 keylen;
+ u8 keyflags;
+ u8 sndid;
+ u8 rcvid;
+ u8 maclen;
+ struct rcu_head rcu;
+ atomic64_t pkt_good;
+ atomic64_t pkt_bad;
+ u8 traffic_keys[];
+};
+
+static inline u8 *rcv_other_key(struct tcp_ao_key *key)
+{
+ return key->traffic_keys;
+}
+
+static inline u8 *snd_other_key(struct tcp_ao_key *key)
+{
+ return key->traffic_keys + key->digest_size;
+}
+
+static inline int tcp_ao_maclen(const struct tcp_ao_key *key)
+{
+ return key->maclen;
+}
+
+/* Use tcp_ao_len_aligned() for TCP header calculations */
+static inline int tcp_ao_len(const struct tcp_ao_key *key)
+{
+ return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr);
+}
+
+static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key)
+{
+ return round_up(tcp_ao_len(key), 4);
+}
+
+static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key)
+{
+ return key->digest_size;
+}
+
+static inline int tcp_ao_sizeof_key(const struct tcp_ao_key *key)
+{
+ return sizeof(struct tcp_ao_key) + (key->digest_size << 1);
+}
+
+struct tcp_ao_info {
+ /* List of tcp_ao_key's */
+ struct hlist_head head;
+ /* current_key and rnext_key are maintained on sockets
+ * in TCP_AO_ESTABLISHED states.
+ * Their purpose is to cache keys on established connections,
+ * saving needless lookups. Never dereference any of them from
+ * listen sockets.
+ * ::current_key may change in RX to the key that was requested by
+ * the peer, please use READ_ONCE()/WRITE_ONCE() in order to avoid
+ * load/store tearing.
+ * Do the same for ::rnext_key, if you don't hold socket lock
+ * (it's changed only by userspace request in setsockopt()).
+ */
+ struct tcp_ao_key *current_key;
+ struct tcp_ao_key *rnext_key;
+ struct tcp_ao_counters counters;
+ u32 ao_required :1,
+ accept_icmps :1,
+ __unused :30;
+ __be32 lisn;
+ __be32 risn;
+ /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ,
+ * that protect TCP-AO connection from replayed old TCP segments.
+ * See RFC5925 (6.2).
+ * In order to get correct SNE, there's a helper tcp_ao_compute_sne().
+ * It needs SEQ basis to understand whereabouts are lower SEQ numbers.
+ * According to that basis vector, it can provide incremented SNE
+ * when SEQ rolls over or provide decremented SNE when there's
+ * a retransmitted segment from before-rolling over.
+ * - for request sockets such basis is rcv_isn/snt_isn, which seems
+ * good enough as it's unexpected to receive 4 Gbytes on reqsk.
+ * - for full sockets the basis is rcv_nxt/snd_una. snd_una is
+ * taken instead of snd_nxt as currently it's easier to track
+ * in tcp_snd_una_update(), rather than updating SNE in all
+ * WRITE_ONCE(tp->snd_nxt, ...)
+ * - for time-wait sockets the basis is tw_rcv_nxt/tw_snd_nxt.
+ * tw_snd_nxt is not expected to change, while tw_rcv_nxt may.
+ */
+ u32 snd_sne;
+ u32 rcv_sne;
+ refcount_t refcnt; /* Protects twsk destruction */
+ struct rcu_head rcu;
+};
+
+#ifdef CONFIG_TCP_MD5SIG
+#include <linux/jump_label.h>
+extern struct static_key_false_deferred tcp_md5_needed;
+#define static_branch_tcp_md5() static_branch_unlikely(&tcp_md5_needed.key)
+#else
+#define static_branch_tcp_md5() false
+#endif
+#ifdef CONFIG_TCP_AO
+/* TCP-AO structures and functions */
+#include <linux/jump_label.h>
+extern struct static_key_false_deferred tcp_ao_needed;
+#define static_branch_tcp_ao() static_branch_unlikely(&tcp_ao_needed.key)
+#else
+#define static_branch_tcp_ao() false
+#endif
+
+#ifdef CONFIG_TCP_AO
+/* TCP-AO structures and functions */
+struct tcp4_ao_context {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ __be32 sisn;
+ __be32 disn;
+};
+
+struct tcp6_ao_context {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 sport;
+ __be16 dport;
+ __be32 sisn;
+ __be32 disn;
+};
+
+struct tcp_sigpool;
+/* Established states are fast-path and there always is current_key/rnext_key */
+#define TCP_AO_ESTABLISHED (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | \
+ TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSING)
+
+int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
+ struct tcp_ao_key *key, struct tcphdr *th,
+ __u8 *hash_location);
+int tcp_ao_hash_skb(unsigned short int family,
+ char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family,
+ sockptr_t optval, int optlen);
+struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk,
+ struct tcp_ao_info *ao,
+ int sndid, int rcvid);
+int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
+ struct request_sock *req, struct sk_buff *skb,
+ int family);
+int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
+ unsigned int len, struct tcp_sigpool *hp);
+void tcp_ao_destroy_sock(struct sock *sk, bool twsk);
+void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp);
+bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code);
+int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen);
+int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen);
+int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen);
+int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen);
+enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
+ const struct sk_buff *skb, unsigned short int family,
+ const struct request_sock *req, int l3index,
+ const struct tcp_ao_hdr *aoh);
+u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq);
+struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_ao_addr *addr,
+ int family, int sndid, int rcvid);
+int tcp_ao_hash_hdr(unsigned short family, char *ao_hash,
+ struct tcp_ao_key *key, const u8 *tkey,
+ const union tcp_ao_addr *daddr,
+ const union tcp_ao_addr *saddr,
+ const struct tcphdr *th, u32 sne);
+int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
+ const struct tcp_ao_hdr *aoh, int l3index, u32 seq,
+ struct tcp_ao_key **key, char **traffic_key,
+ bool *allocated_traffic_key, u8 *keyid, u32 *sne);
+
+/* ipv4 specific functions */
+int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen);
+struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk,
+ int sndid, int rcvid);
+int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *mkt,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne);
+int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk,
+ __be32 sisn, __be32 disn, bool send);
+int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key,
+ struct request_sock *req);
+struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid);
+int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+/* ipv6 specific functions */
+int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int nbytes);
+int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key,
+ const struct sk_buff *skb, __be32 sisn, __be32 disn);
+int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk, __be32 sisn,
+ __be32 disn, bool send);
+int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key,
+ struct request_sock *req);
+struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk,
+ struct sock *addr_sk, int sndid, int rcvid);
+struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid);
+int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen);
+int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne);
+void tcp_ao_established(struct sock *sk);
+void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb);
+void tcp_ao_connect_init(struct sock *sk);
+void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
+ struct request_sock *req, unsigned short int family);
+#else /* CONFIG_TCP_AO */
+
+static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
+ struct tcp_ao_key *key, struct tcphdr *th,
+ __u8 *hash_location)
+{
+ return 0;
+}
+
+static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
+ struct request_sock *req, unsigned short int family)
+{
+}
+
+static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int family,
+ int type, int code)
+{
+ return false;
+}
+
+static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
+ const struct sk_buff *skb, unsigned short int family,
+ const struct request_sock *req, int l3index,
+ const struct tcp_ao_hdr *aoh)
+{
+ return SKB_NOT_DROPPED_YET;
+}
+
+static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk,
+ int l3index, const union tcp_ao_addr *addr,
+ int family, int sndid, int rcvid)
+{
+ return NULL;
+}
+
+static inline void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+{
+}
+
+static inline void tcp_ao_established(struct sock *sk)
+{
+}
+
+static inline void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb)
+{
+}
+
+static inline void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw,
+ struct tcp_sock *tp)
+{
+}
+
+static inline void tcp_ao_connect_init(struct sock *sk)
+{
+}
+
+static inline int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static inline int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static inline int tcp_ao_get_repair(struct sock *sk,
+ sockptr_t optval, sockptr_t optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static inline int tcp_ao_set_repair(struct sock *sk,
+ sockptr_t optval, unsigned int optlen)
+{
+ return -ENOPROTOOPT;
+}
+#endif
+
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
+int tcp_do_parse_auth_options(const struct tcphdr *th,
+ const u8 **md5_hash, const u8 **ao_hash);
+#else
+static inline int tcp_do_parse_auth_options(const struct tcphdr *th,
+ const u8 **md5_hash, const u8 **ao_hash)
+{
+ *md5_hash = NULL;
+ *ao_hash = NULL;
+ return 0;
+}
+#endif
+
+#endif /* _TCP_AO_H */
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index cc00118acca1..d60e8148ff4c 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -22,6 +22,7 @@ enum {
TCP_LISTEN,
TCP_CLOSING, /* Now a valid state */
TCP_NEW_SYN_RECV,
+ TCP_BOUND_INACTIVE, /* Pseudo-state for inet_diag */
TCP_MAX_STATES /* Leave at the end! */
};
@@ -43,6 +44,7 @@ enum {
TCPF_LISTEN = (1 << TCP_LISTEN),
TCPF_CLOSING = (1 << TCP_CLOSING),
TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
+ TCPF_BOUND_INACTIVE = (1 << TCP_BOUND_INACTIVE),
};
#endif /* _LINUX_TCP_STATES_H */
diff --git a/include/net/tcx.h b/include/net/tcx.h
index a0f78fd5cb28..5ce0ce9e0c02 100644
--- a/include/net/tcx.h
+++ b/include/net/tcx.h
@@ -38,16 +38,11 @@ static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
return container_of(bundle, struct tcx_entry, bundle);
}
-static inline struct tcx_link *tcx_link(struct bpf_link *link)
+static inline struct tcx_link *tcx_link(const struct bpf_link *link)
{
return container_of(link, struct tcx_link, link);
}
-static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link)
-{
- return tcx_link((struct bpf_link *)link);
-}
-
void tcx_inc(void);
void tcx_dec(void);
@@ -80,9 +75,9 @@ tcx_entry_fetch(struct net_device *dev, bool ingress)
return rcu_dereference_rtnl(dev->tcx_egress);
}
-static inline struct bpf_mprog_entry *tcx_entry_create(void)
+static inline struct bpf_mprog_entry *tcx_entry_create_noprof(void)
{
- struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL);
+ struct tcx_entry *tcx = kzalloc_noprof(sizeof(*tcx), GFP_KERNEL);
if (tcx) {
bpf_mprog_bundle_init(&tcx->bundle);
@@ -90,6 +85,7 @@ static inline struct bpf_mprog_entry *tcx_entry_create(void)
}
return NULL;
}
+#define tcx_entry_create(...) alloc_hooks(tcx_entry_create_noprof(__VA_ARGS__))
static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
{
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index 74d2b463cc95..62b3e9f2aed4 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -15,18 +15,9 @@ struct timewait_sock_ops {
struct kmem_cache *twsk_slab;
char *twsk_slab_name;
unsigned int twsk_obj_size;
- int (*twsk_unique)(struct sock *sk,
- struct sock *sktw, void *twp);
void (*twsk_destructor)(struct sock *sk);
};
-static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
-{
- if (sk->sk_prot->twsk_prot->twsk_unique != NULL)
- return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp);
- return 0;
-}
-
static inline void twsk_destructor(struct sock *sk)
{
if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)
diff --git a/include/net/tls.h b/include/net/tls.h
index 6c642ea18050..61fef2880114 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -61,7 +61,8 @@ struct tls_rec;
#define TLS_AAD_SPACE_SIZE 13
-#define MAX_IV_SIZE 16
+#define TLS_MAX_IV_SIZE 16
+#define TLS_MAX_SALT_SIZE 4
#define TLS_TAG_SIZE 16
#define TLS_MAX_REC_SEQ_SIZE 8
#define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE
@@ -145,6 +146,7 @@ struct tls_record_info {
skb_frag_t frags[MAX_SKB_FRAGS];
};
+#define TLS_DRIVER_STATE_SIZE_TX 16
struct tls_offload_context_tx {
struct crypto_aead *aead_send;
spinlock_t lock; /* protects records list */
@@ -158,17 +160,13 @@ struct tls_offload_context_tx {
void (*sk_destruct)(struct sock *sk);
struct work_struct destruct_work;
struct tls_context *ctx;
- u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
* Currently the belief is that there is not enough
* driver specific state to justify another layer of indirection
*/
-#define TLS_DRIVER_STATE_SIZE_TX 16
+ u8 driver_state[TLS_DRIVER_STATE_SIZE_TX] __aligned(8);
};
-#define TLS_OFFLOAD_CONTEXT_SIZE_TX \
- (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX)
-
enum tls_context_flags {
/* tls_device_down was called after the netdev went down, device state
* was released, and kTLS works in software, even though rx_conf is
@@ -189,8 +187,8 @@ enum tls_context_flags {
};
struct cipher_context {
- char *iv;
- char *rec_seq;
+ char iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE];
+ char rec_seq[TLS_MAX_REC_SEQ_SIZE];
};
union tls_crypto_context {
@@ -298,6 +296,7 @@ struct tls_offload_resync_async {
u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX];
};
+#define TLS_DRIVER_STATE_SIZE_RX 8
struct tls_offload_context_rx {
/* sw must be the first member of tls_offload_context_rx */
struct tls_sw_context_rx sw;
@@ -321,17 +320,13 @@ struct tls_offload_context_rx {
struct tls_offload_resync_async *resync_async;
};
};
- u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
* Currently the belief is that there is not enough
* driver specific state to justify another layer of indirection
*/
-#define TLS_DRIVER_STATE_SIZE_RX 8
+ u8 driver_state[TLS_DRIVER_STATE_SIZE_RX] __aligned(8);
};
-#define TLS_OFFLOAD_CONTEXT_SIZE_RX \
- (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
-
struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
u32 seq, u64 *p_record_sn);
@@ -367,7 +362,7 @@ static inline bool tls_is_skb_tx_device_offloaded(const struct sk_buff *skb)
static inline struct tls_context *tls_get_ctx(const struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
/* Use RCU on icsk_ulp_data only for sock diag code,
* TLS data path doesn't need rcu_dereference().
diff --git a/include/net/udp.h b/include/net/udp.h
index 488a6d2babcc..61222545ab1c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -79,7 +79,8 @@ struct udp_table {
extern struct udp_table udp_table;
void udp_table_init(struct udp_table *, const char *);
static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
- struct net *net, unsigned int num)
+ const struct net *net,
+ unsigned int num)
{
return &table->hash[udp_hashfn(net, num, table->mask)];
}
@@ -231,7 +232,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
}
/* Since this is being sent on the wire obfuscate hash a bit
- * to minimize possbility that any useful information to an
+ * to minimize possibility that any useful information to an
* attacker is leaked. Only upper 16 bits are relevant in the
* computation for 16 bit port value.
*/
@@ -245,7 +246,7 @@ static inline int udp_rqueue_get(struct sock *sk)
return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit);
}
-static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+static inline bool udp_sk_bound_dev_eq(const struct net *net, int bound_dev_if,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
@@ -296,18 +297,19 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen,
int (*push_pending_frames)(struct sock *));
-struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
+ __be16 sport,
__be32 daddr, __be16 dport, int dif, int sdif,
struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
-struct sock *udp6_lib_lookup(struct net *net,
+struct sock *udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif);
-struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *tbl,
@@ -379,14 +381,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb)
static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
struct iov_iter *to)
{
- int n;
-
- n = copy_to_iter(skb->data + off, len, to);
- if (n == len)
- return 0;
-
- iov_iter_revert(to, n);
- return -EFAULT;
+ return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT;
}
/*
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 29251c3519cf..a93dc51f6323 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -154,16 +154,33 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck);
void udp_tunnel_sock_release(struct socket *sock);
+struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net, int oif,
+ __be32 *saddr,
+ const struct ip_tunnel_key *key,
+ __be16 sport, __be16 dport, u8 tos,
+ struct dst_cache *dst_cache);
+struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net,
+ struct socket *sock, int oif,
+ struct in6_addr *saddr,
+ const struct ip_tunnel_key *key,
+ __be16 sport, __be16 dport, u8 dsfield,
+ struct dst_cache *dst_cache);
+
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
- __be16 flags, __be64 tunnel_id,
- int md_size);
+ const unsigned long *flags,
+ __be64 tunnel_id, int md_size);
#ifdef CONFIG_INET
static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6a9f8a5f387c..33ba6fc151cf 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -210,22 +210,23 @@ struct vxlan_rdst {
};
struct vxlan_config {
- union vxlan_addr remote_ip;
- union vxlan_addr saddr;
- __be32 vni;
- int remote_ifindex;
- int mtu;
- __be16 dst_port;
- u16 port_min;
- u16 port_max;
- u8 tos;
- u8 ttl;
- __be32 label;
- u32 flags;
- unsigned long age_interval;
- unsigned int addrmax;
- bool no_share;
- enum ifla_vxlan_df df;
+ union vxlan_addr remote_ip;
+ union vxlan_addr saddr;
+ __be32 vni;
+ int remote_ifindex;
+ int mtu;
+ __be16 dst_port;
+ u16 port_min;
+ u16 port_max;
+ u8 tos;
+ u8 ttl;
+ __be32 label;
+ enum ifla_vxlan_label_policy label_policy;
+ u32 flags;
+ unsigned long age_interval;
+ unsigned int addrmax;
+ bool no_share;
+ enum ifla_vxlan_df df;
};
enum {
diff --git a/include/net/x25.h b/include/net/x25.h
index 597eb53c471e..5e833cfc864e 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -81,7 +81,7 @@ enum {
#define X25_DEFAULT_WINDOW_SIZE 2 /* Default Window Size */
#define X25_DEFAULT_PACKET_SIZE X25_PS128 /* Default Packet Size */
-#define X25_DEFAULT_THROUGHPUT 0x0A /* Deafult Throughput */
+#define X25_DEFAULT_THROUGHPUT 0x0A /* Default Throughput */
#define X25_DEFAULT_REVERSE 0x00 /* Default Reverse Charging */
#define X25_SMODULUS 8
diff --git a/include/net/xdp.h b/include/net/xdp.h
index de08c8e0d134..e6770dd40c91 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -16,7 +16,7 @@
*
* The XDP RX-queue info (xdp_rxq_info) is associated with the driver
* level RX-ring queues. It is information that is specific to how
- * the driver have configured a given RX-ring queue.
+ * the driver has configured a given RX-ring queue.
*
* Each xdp_buff frame received in the driver carries a (pointer)
* reference to this xdp_rxq_info structure. This provides the XDP
@@ -32,7 +32,7 @@
* The struct is not directly tied to the XDP prog. A new XDP prog
* can be attached as long as it doesn't change the underlying
* RX-ring. If the RX-ring does change significantly, the NIC driver
- * naturally need to stop the RX-ring before purging and reallocating
+ * naturally needs to stop the RX-ring before purging and reallocating
* memory. In that process the driver MUST call unregister (which
* also applies for driver shutdown and unload). The register API is
* also mandatory during RX-ring setup.
@@ -369,7 +369,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
static inline bool xdp_metalen_invalid(unsigned long metalen)
{
- return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
+ unsigned long meta_max;
+
+ meta_max = type_max(typeof_member(struct skb_shared_info, meta_len));
+ BUILD_BUG_ON(!__builtin_constant_p(meta_max));
+
+ return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max;
}
struct xdp_attachment_info {
@@ -383,14 +388,29 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
+/* Define the relationship between xdp-rx-metadata kfunc and
+ * various other entities:
+ * - xdp_rx_metadata enum
+ * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml)
+ * - kfunc name
+ * - xdp_metadata_ops field
+ */
#define XDP_METADATA_KFUNC_xxx \
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
- bpf_xdp_metadata_rx_timestamp) \
+ NETDEV_XDP_RX_METADATA_TIMESTAMP, \
+ bpf_xdp_metadata_rx_timestamp, \
+ xmo_rx_timestamp) \
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
- bpf_xdp_metadata_rx_hash) \
-
-enum {
-#define XDP_METADATA_KFUNC(name, _) name,
+ NETDEV_XDP_RX_METADATA_HASH, \
+ bpf_xdp_metadata_rx_hash, \
+ xmo_rx_hash) \
+ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
+ NETDEV_XDP_RX_METADATA_VLAN_TAG, \
+ bpf_xdp_metadata_rx_vlan_tag, \
+ xmo_rx_vlan_tag) \
+
+enum xdp_rx_metadata {
+#define XDP_METADATA_KFUNC(name, _, __, ___) name,
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
MAX_XDP_METADATA_KFUNC,
@@ -416,6 +436,7 @@ enum xdp_rss_hash_type {
XDP_RSS_L4_UDP = BIT(5),
XDP_RSS_L4_SCTP = BIT(6),
XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */
+ XDP_RSS_L4_ICMP = BIT(8),
/* Second part: RSS hash type combinations used for driver HW mapping */
XDP_RSS_TYPE_NONE = 0,
@@ -431,11 +452,13 @@ enum xdp_rss_hash_type {
XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR,
@@ -446,6 +469,8 @@ struct xdp_metadata_ops {
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);
+ int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci);
};
#ifdef CONFIG_NET
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 69b472604b86..df3f5f07bc7c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
+ u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
@@ -63,8 +64,12 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head tx_list;
- /* Protects generic receive. */
- spinlock_t rx_lock;
+ /* record the number of tx descriptors sent by this xsk and
+ * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs
+ * to be given to other xsks for sending tx descriptors, thereby
+ * preventing other XSKs from being starved.
+ */
+ u32 tx_budget_spent;
/* Statistics */
u64 rx_dropped;
@@ -85,11 +90,106 @@ struct xdp_sock {
struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
};
+/*
+ * AF_XDP TX metadata hooks for network devices.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * void (*tmo_request_timestamp)(void *priv)
+ * Called when AF_XDP frame requested egress timestamp.
+ *
+ * u64 (*tmo_fill_timestamp)(void *priv)
+ * Called when AF_XDP frame, that had requested egress timestamp,
+ * received a completion. The hook needs to return the actual HW timestamp.
+ *
+ * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv)
+ * Called when AF_XDP frame requested HW checksum offload. csum_start
+ * indicates position where checksumming should start.
+ * csum_offset indicates position where checksum should be stored.
+ *
+ */
+struct xsk_tx_metadata_ops {
+ void (*tmo_request_timestamp)(void *priv);
+ u64 (*tmo_fill_timestamp)(void *priv);
+ void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
+};
+
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
-void __xsk_map_flush(void);
+void __xsk_map_flush(struct list_head *flush_list);
+
+/**
+ * xsk_tx_metadata_to_compl - Save enough relevant metadata information
+ * to perform tx completion in the future.
+ * @meta: pointer to AF_XDP metadata area
+ * @compl: pointer to output struct xsk_tx_metadata_to_compl
+ *
+ * This function should be called by the networking device when
+ * it prepares AF_XDP egress packet. The value of @compl should be stored
+ * and passed to xsk_tx_metadata_complete upon TX completion.
+ */
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+ struct xsk_tx_metadata_compl *compl)
+{
+ if (!meta)
+ return;
+
+ if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+ compl->tx_timestamp = &meta->completion.tx_timestamp;
+ else
+ compl->tx_timestamp = NULL;
+}
+
+/**
+ * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission
+ * and call appropriate xsk_tx_metadata_ops operation.
+ * @meta: pointer to AF_XDP metadata area
+ * @ops: pointer to struct xsk_tx_metadata_ops
+ * @priv: pointer to driver-private aread
+ *
+ * This function should be called by the networking device when
+ * it prepares AF_XDP egress packet.
+ */
+static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+ if (!meta)
+ return;
+
+ if (ops->tmo_request_timestamp)
+ if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+ ops->tmo_request_timestamp(priv);
+
+ if (ops->tmo_request_checksum)
+ if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM)
+ ops->tmo_request_checksum(meta->request.csum_start,
+ meta->request.csum_offset, priv);
+}
+
+/**
+ * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion
+ * and call appropriate xsk_tx_metadata_ops operation.
+ * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl
+ * @ops: pointer to struct xsk_tx_metadata_ops
+ * @priv: pointer to driver-private aread
+ *
+ * This function should be called by the networking device upon
+ * AF_XDP egress completion.
+ */
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+ if (!compl)
+ return;
+ if (!compl->tx_timestamp)
+ return;
+
+ *compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
+}
#else
@@ -103,10 +203,26 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
return -EOPNOTSUPP;
}
-static inline void __xsk_map_flush(void)
+static inline void __xsk_map_flush(struct list_head *flush_list)
{
}
-#endif /* CONFIG_XDP_SOCKETS */
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+ struct xsk_tx_metadata_compl *compl)
+{
+}
+static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+}
+
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+}
+
+#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 5425f7ad5ebd..0a5dca2b2b3f 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -12,6 +12,12 @@
#define XDP_UMEM_MIN_CHUNK_SHIFT 11
#define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT)
+struct xsk_cb_desc {
+ void *src;
+ u8 off;
+ u8 bytes;
+};
+
#ifdef CONFIG_XDP_SOCKETS
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
@@ -47,6 +53,12 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
xp_set_rxq_info(pool, rxq);
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+ xp_fill_cb(pool, desc);
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -183,12 +195,33 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return xp_raw_get_data(pool, addr);
}
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
+#define XDP_TXMD_FLAGS_VALID ( \
+ XDP_TXMD_FLAGS_TIMESTAMP | \
+ XDP_TXMD_FLAGS_CHECKSUM | \
+ 0)
+
+static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
{
- struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
+}
+
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ struct xsk_tx_metadata *meta;
+
+ if (!pool->tx_metadata_len)
+ return NULL;
+
+ meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+ if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
+ return NULL; /* no way to signal the error to the user */
+
+ return meta;
+}
- if (!pool->dma_need_sync)
- return;
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+{
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
xp_dma_sync_for_cpu(xskb);
}
@@ -268,6 +301,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
{
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
return 0;
@@ -351,7 +389,17 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return NULL;
}
-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
+static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+{
+ return false;
+}
+
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ return NULL;
+}
+
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
{
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 93a9866ee481..1484dd15a369 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -51,8 +51,10 @@
#ifdef CONFIG_XFRM_STATISTICS
#define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
+#define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val)
#else
#define XFRM_INC_STATS(net, field) ((void)(net))
+#define XFRM_ADD_STATS(net, field, val) ((void)(net))
#endif
@@ -65,27 +67,27 @@
- instance of a transformer, struct xfrm_state (=SA)
- template to clone xfrm_state, struct xfrm_tmpl
- SPD is plain linear list of xfrm_policy rules, ordered by priority.
+ SPD is organized as hash table (for policies that meet minimum address prefix
+ length setting, net->xfrm.policy_hthresh). Other policies are stored in
+ lists, sorted into rbtree ordered by destination and source address networks.
+ See net/xfrm/xfrm_policy.c for details.
+
(To be compatible with existing pfkeyv2 implementations,
many rules with priority of 0x7fffffff are allowed to exist and
such rules are ordered in an unpredictable way, thanks to bsd folks.)
- Lookup is plain linear search until the first match with selector.
-
If "action" is "block", then we prohibit the flow, otherwise:
if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
policy entry has list of up to XFRM_MAX_DEPTH transformations,
described by templates xfrm_tmpl. Each template is resolved
to a complete xfrm_state (see below) and we pack bundle of transformations
- to a dst_entry returned to requestor.
+ to a dst_entry returned to requester.
dst -. xfrm .-> xfrm_state #1
|---. child .-> dst -. xfrm .-> xfrm_state #2
|---. child .-> dst -. xfrm .-> xfrm_state #3
|---. child .-> NULL
- Bundles are cached at xrfm_policy struct (field ->bundles).
-
Resolution of xrfm_tmpl
-----------------------
@@ -182,10 +184,13 @@ struct xfrm_state {
};
struct hlist_node byspi;
struct hlist_node byseq;
+ struct hlist_node state_cache;
+ struct hlist_node state_cache_input;
refcount_t refcnt;
spinlock_t lock;
+ u32 pcpu_num;
struct xfrm_id id;
struct xfrm_selector sel;
struct xfrm_mark mark;
@@ -228,7 +233,10 @@ struct xfrm_state {
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
- struct sock __rcu *encap_sk;
+
+ /* NAT keepalive */
+ u32 nat_keepalive_interval; /* seconds */
+ time64_t nat_keepalive_expiration;
/* Data for care-of address */
xfrm_address_t *coaddr;
@@ -292,6 +300,7 @@ struct xfrm_state {
/* Private data of this transformer, format is opaque,
* interpreted by xfrm_type methods. */
void *data;
+ u8 dir;
};
static inline struct net *xs_net(struct xfrm_state *x)
@@ -524,11 +533,44 @@ struct xfrm_policy_queue {
unsigned long timeout;
};
+/**
+ * struct xfrm_policy - xfrm policy
+ * @xp_net: network namespace the policy lives in
+ * @bydst: hlist node for SPD hash table or rbtree list
+ * @byidx: hlist node for index hash table
+ * @state_cache_list: hlist head for policy cached xfrm states
+ * @lock: serialize changes to policy structure members
+ * @refcnt: reference count, freed once it reaches 0
+ * @pos: kernel internal tie-breaker to determine age of policy
+ * @timer: timer
+ * @genid: generation, used to invalidate old policies
+ * @priority: priority, set by userspace
+ * @index: policy index (autogenerated)
+ * @if_id: virtual xfrm interface id
+ * @mark: packet mark
+ * @selector: selector
+ * @lft: liftime configuration data
+ * @curlft: liftime state
+ * @walk: list head on pernet policy list
+ * @polq: queue to hold packets while aqcuire operaion in progress
+ * @bydst_reinsert: policy tree node needs to be merged
+ * @type: XFRM_POLICY_TYPE_MAIN or _SUB
+ * @action: XFRM_POLICY_ALLOW or _BLOCK
+ * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP
+ * @xfrm_nr: number of used templates in @xfrm_vec
+ * @family: protocol family
+ * @security: SELinux security label
+ * @xfrm_vec: array of templates to resolve state
+ * @rcu: rcu head, used to defer memory release
+ * @xdo: hardware offload state
+ */
struct xfrm_policy {
possible_net_t xp_net;
struct hlist_node bydst;
struct hlist_node byidx;
+ struct hlist_head state_cache_list;
+
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
@@ -553,7 +595,6 @@ struct xfrm_policy {
u16 family;
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
- struct hlist_node bydst_inexact_list;
struct rcu_head rcu;
struct xfrm_dev_offload xdo;
@@ -1014,7 +1055,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
struct xfrm_if_parms {
int link; /* ifindex of underlying L2 interface */
- u32 if_id; /* interface identifyer */
+ u32 if_id; /* interface identifier */
bool collect_md;
};
@@ -1181,9 +1222,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
if (xo) {
x = xfrm_input_state(skb);
- if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
- return (xo->flags & CRYPTO_DONE) &&
- (xo->status & CRYPTO_SUCCESS);
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+ bool check = (xo->flags & CRYPTO_DONE) &&
+ (xo->status & CRYPTO_SUCCESS);
+
+ /* The packets here are plain ones and secpath was
+ * needed to indicate that hardware already handled
+ * them and there is no need to do nothing in addition.
+ *
+ * Consume secpath which was set by drivers.
+ */
+ secpath_reset(skb);
+ return check;
+ }
}
return __xfrm_check_nopolicy(net, skb, dir) ||
@@ -1218,20 +1269,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1);
}
-int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse);
-static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 0);
+ return __xfrm_decode_session(net, skb, fl, family, 0);
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 1);
+ return __xfrm_decode_session(net, skb, fl, family, 1);
}
int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
@@ -1307,7 +1358,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
{
return 1;
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
@@ -1588,22 +1639,20 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
unsigned short family);
int xfrm_state_check_expire(struct xfrm_state *x);
+void xfrm_state_update_stats(struct net *net);
#ifdef CONFIG_XFRM_OFFLOAD
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x)
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
{
struct xfrm_dev_offload *xdo = &x->xso;
struct net_device *dev = READ_ONCE(xdo->dev);
- if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
- return;
-
if (dev && dev->xfrmdev_ops &&
- dev->xfrmdev_ops->xdo_dev_state_update_curlft)
- dev->xfrmdev_ops->xdo_dev_state_update_curlft(x);
+ dev->xfrmdev_ops->xdo_dev_state_update_stats)
+ dev->xfrmdev_ops->xdo_dev_state_update_stats(x);
}
#else
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {}
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {}
#endif
void xfrm_state_insert(struct xfrm_state *x);
int xfrm_state_add(struct xfrm_state *x);
@@ -1611,6 +1660,10 @@ int xfrm_state_update(struct xfrm_state *x);
struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family);
struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -1650,7 +1703,7 @@ struct xfrmk_spdinfo {
u32 spdhmcnt;
};
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_delete(struct xfrm_state *x);
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
@@ -1680,7 +1733,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
#endif
void xfrm_local_error(struct sk_buff *skb, int mtu);
-int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm4_transport_finish(struct sk_buff *skb, int async);
@@ -1700,7 +1752,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t);
int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
@@ -1723,6 +1774,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
int optlen);
#else
@@ -1760,7 +1815,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
struct netlink_ext_ack *extack);
struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
- u8 mode, u32 reqid, u32 if_id, u8 proto,
+ u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create,
unsigned short family);
@@ -2155,7 +2210,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
proto = sk->sk_protocol;
if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
- return inet6_sk(sk)->dontfrag;
+ return inet6_test_bit(DONTFRAG, sk);
return false;
}
@@ -2177,4 +2232,19 @@ static inline int register_xfrm_interface_bpf(void)
#endif
+#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
+int register_xfrm_state_bpf(void);
+#else
+static inline int register_xfrm_state_bpf(void)
+{
+ return 0;
+}
+#endif
+
+int xfrm_nat_keepalive_init(unsigned short family);
+void xfrm_nat_keepalive_fini(unsigned short family);
+int xfrm_nat_keepalive_net_init(struct net *net);
+int xfrm_nat_keepalive_net_fini(struct net *net);
+void xfrm_nat_keepalive_state_updated(struct xfrm_state *x);
+
#endif /* _NET_XFRM_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..823fd5c7a3b1 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -12,6 +12,7 @@
struct xsk_buff_pool;
struct xdp_rxq_info;
+struct xsk_cb_desc;
struct xsk_queue;
struct xdp_desc;
struct xdp_umem;
@@ -33,6 +34,7 @@ struct xdp_buff_xsk {
};
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
+#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t))
struct xsk_dma_map {
dma_addr_t *dma_pages;
@@ -41,7 +43,6 @@ struct xsk_dma_map {
refcount_t users;
struct list_head list; /* Protected by the RTNL_LOCK */
u32 dma_pages_cnt;
- bool dma_need_sync;
};
struct xsk_buff_pool {
@@ -54,6 +55,8 @@ struct xsk_buff_pool {
refcount_t users;
struct xdp_umem *umem;
struct work_struct work;
+ /* Protects generic receive in shared and non-shared umem mode. */
+ spinlock_t rx_lock;
struct list_head free_list;
struct list_head xskb_list;
u32 heads_cnt;
@@ -77,10 +80,11 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
- bool dma_need_sync;
bool unaligned;
+ bool tx_sw_csum;
void *addrs;
/* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
* NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
@@ -132,6 +136,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
/* AF_XDP ZC drivers, via xdp_sock_buff.h */
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc);
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
unsigned long attrs, struct page **pages, u32 nr_pages);
void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
@@ -150,21 +155,17 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
return xskb->frame_dma;
}
-void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
{
- xp_dma_sync_for_cpu_slow(xskb);
+ dma_sync_single_for_cpu(xskb->pool->dev, xskb->dma,
+ xskb->pool->frame_len,
+ DMA_BIDIRECTIONAL);
}
-void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
- size_t size);
static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma, size_t size)
{
- if (!pool->dma_need_sync)
- return;
-
- xp_dma_sync_for_device_slow(pool, dma, size);
+ dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL);
}
/* Masks for xdp_umem_page flags.
@@ -233,4 +234,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}
+static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)
+{
+ return pool->tx_metadata_len > 0;
+}
+
#endif /* XSK_BUFF_POOL_H_ */