summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2026-04-20 04:28:57 +0300
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2026-04-20 04:28:57 +0300
commitf4b369c6fe0ceaba2da2daff8c9eb415f85926dd (patch)
tree30465d0a429b2c224685b5d8e804bf053c4d129a /include/net
parentff14dafde15c11403fac61367a34fea08926e9ee (diff)
parent2ca45e57ea027fffe3350ae5e21ad9cecb0dce74 (diff)
downloadlinux-f4b369c6fe0ceaba2da2daff8c9eb415f85926dd.tar.xz
Merge branch 'next' into for-linus
Prepare input updates for 7.1 merge window.
Diffstat (limited to 'include/net')
-rw-r--r--include/net/9p/client.h98
-rw-r--r--include/net/9p/transport.h15
-rw-r--r--include/net/act_api.h3
-rw-r--r--include/net/addrconf.h8
-rw-r--r--include/net/af_vsock.h70
-rw-r--r--include/net/ax25.h10
-rw-r--r--include/net/bluetooth/bluetooth.h47
-rw-r--r--include/net/bluetooth/hci.h413
-rw-r--r--include/net/bluetooth/hci_core.h32
-rw-r--r--include/net/bluetooth/hci_sync.h6
-rw-r--r--include/net/bluetooth/l2cap.h11
-rw-r--r--include/net/bluetooth/mgmt.h2
-rw-r--r--include/net/bonding.h16
-rw-r--r--include/net/can.h28
-rw-r--r--include/net/cfg80211.h174
-rw-r--r--include/net/devlink.h49
-rw-r--r--include/net/dropreason-core.h6
-rw-r--r--include/net/dsa.h34
-rw-r--r--include/net/dst.h6
-rw-r--r--include/net/flow_offload.h34
-rw-r--r--include/net/fq_impl.h2
-rw-r--r--include/net/gro.h32
-rw-r--r--include/net/hotdata.h1
-rw-r--r--include/net/ieee80211_radiotap.h20
-rw-r--r--include/net/inet6_connection_sock.h4
-rw-r--r--include/net/inet6_hashtables.h2
-rw-r--r--include/net/inet_common.h13
-rw-r--r--include/net/inet_connection_sock.h35
-rw-r--r--include/net/inet_ecn.h20
-rw-r--r--include/net/inet_frag.h18
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--include/net/inet_sock.h33
-rw-r--r--include/net/ioam6.h2
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/ip6_route.h6
-rw-r--r--include/net/ip6_tunnel.h14
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ip_tunnels.h50
-rw-r--r--include/net/ipv6.h166
-rw-r--r--include/net/ipv6_frag.h9
-rw-r--r--include/net/ipv6_stubs.h2
-rw-r--r--include/net/iucv/iucv.h209
-rw-r--r--include/net/l3mdev.h7
-rw-r--r--include/net/libeth/xsk.h3
-rw-r--r--include/net/mac80211.h148
-rw-r--r--include/net/mana/gdma.h76
-rw-r--r--include/net/mana/hw_channel.h2
-rw-r--r--include/net/mana/mana.h25
-rw-r--r--include/net/neighbour.h17
-rw-r--r--include/net/net_namespace.h5
-rw-r--r--include/net/netdev_queues.h73
-rw-r--r--include/net/netdev_rx_queue.h2
-rw-r--r--include/net/netfilter/nf_conntrack.h1
-rw-r--r--include/net/netfilter/nf_conntrack_count.h16
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h2
-rw-r--r--include/net/netfilter/nf_conntrack_tuple.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h26
-rw-r--r--include/net/netfilter/nf_queue.h4
-rw-r--r--include/net/netfilter/nf_tables.h46
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h4
-rw-r--r--include/net/netmem.h102
-rw-r--r--include/net/netns/core.h2
-rw-r--r--include/net/netns/ipv4.h14
-rw-r--r--include/net/netns/ipv6.h11
-rw-r--r--include/net/netns/mpls.h1
-rw-r--r--include/net/netns/smc.h5
-rw-r--r--include/net/netns/vsock.h24
-rw-r--r--include/net/nfc/nfc.h2
-rw-r--r--include/net/nl802154.h5
-rw-r--r--include/net/page_pool/types.h3
-rw-r--r--include/net/phy/realtek_phy.h7
-rw-r--r--include/net/ping.h2
-rw-r--r--include/net/pkt_sched.h35
-rw-r--r--include/net/proto_memory.h3
-rw-r--r--include/net/psp/types.h32
-rw-r--r--include/net/request_sock.h12
-rw-r--r--include/net/sch_generic.h165
-rw-r--r--include/net/sch_priv.h27
-rw-r--r--include/net/sctp/auth.h1
-rw-r--r--include/net/sctp/sctp.h5
-rw-r--r--include/net/sctp/stream_sched.h4
-rw-r--r--include/net/sctp/structs.h9
-rw-r--r--include/net/secure_seq.h45
-rw-r--r--include/net/selftests.h45
-rw-r--r--include/net/smc.h53
-rw-r--r--include/net/sock.h118
-rw-r--r--include/net/tc_act/tc_gate.h35
-rw-r--r--include/net/tc_act/tc_ife.h4
-rw-r--r--include/net/tcp.h155
-rw-r--r--include/net/tcp_ecn.h103
-rw-r--r--include/net/tls.h3
-rw-r--r--include/net/udp.h13
-rw-r--r--include/net/udp_tunnel.h34
-rw-r--r--include/net/vsock_addr.h2
-rw-r--r--include/net/xdp_sock.h7
-rw-r--r--include/net/xdp_sock_drv.h20
-rw-r--r--include/net/xfrm.h10
-rw-r--r--include/net/xsk_buff_pool.h8
98 files changed, 2445 insertions, 850 deletions
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 4f785098c67a..838a94218b59 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -16,6 +16,12 @@
/* Number of requests per row */
#define P9_ROW_MAXTAG 255
+/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ +
+ * room for write (16 extra) or read (11 extra) operands.
+ */
+
+#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ)
+
/** enum p9_proto_versions - 9P protocol versions
* @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u
* @p9_proto_2000u: 9P2000.u extension
@@ -127,6 +133,96 @@ struct p9_client {
};
/**
+ * struct p9_fd_opts - holds client options during parsing
+ * @msize: maximum data size negotiated by protocol
+ * @prot-Oversion: 9P protocol version to use
+ * @trans_mod: module API instantiated with this client
+ *
+ * These parsed options get transferred into client in
+ * apply_client_options()
+ */
+struct p9_client_opts {
+ unsigned int msize;
+ unsigned char proto_version;
+ struct p9_trans_module *trans_mod;
+};
+
+/**
+ * struct p9_fd_opts - per-transport options for fd transport
+ * @rfd: file descriptor for reading (trans=fd)
+ * @wfd: file descriptor for writing (trans=fd)
+ * @port: port to connect to (trans=tcp)
+ * @privport: port is privileged
+ */
+struct p9_fd_opts {
+ int rfd;
+ int wfd;
+ u16 port;
+ bool privport;
+};
+
+/**
+ * struct p9_rdma_opts - Collection of mount options for rdma transport
+ * @port: port of connection
+ * @privport: Whether a privileged port may be used
+ * @sq_depth: The requested depth of the SQ. This really doesn't need
+ * to be any deeper than the number of threads used in the client
+ * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
+ * @timeout: Time to wait in msecs for CM events
+ */
+struct p9_rdma_opts {
+ short port;
+ bool privport;
+ int sq_depth;
+ int rq_depth;
+ long timeout;
+};
+
+/**
+ * struct p9_session_opts - holds parsed options for v9fs_session_info
+ * @flags: session options of type &p9_session_flags
+ * @nodev: set to 1 to disable device mapping
+ * @debug: debug level
+ * @afid: authentication handle
+ * @cache: cache mode of type &p9_cache_bits
+ * @cachetag: the tag of the cache associated with this session
+ * @uname: string user name to mount hierarchy as
+ * @aname: mount specifier for remote hierarchy
+ * @dfltuid: default numeric userid to mount hierarchy as
+ * @dfltgid: default numeric groupid to mount hierarchy as
+ * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
+ * @session_lock_timeout: retry interval for blocking locks
+ *
+ * This strucure holds options which are parsed and will be transferred
+ * to the v9fs_session_info structure when mounted, and therefore largely
+ * duplicates struct v9fs_session_info.
+ */
+struct p9_session_opts {
+ unsigned int flags;
+ unsigned char nodev;
+ unsigned short debug;
+ unsigned int afid;
+ unsigned int cache;
+#ifdef CONFIG_9P_FSCACHE
+ char *cachetag;
+#endif
+ char *uname;
+ char *aname;
+ kuid_t dfltuid;
+ kgid_t dfltgid;
+ kuid_t uid;
+ long session_lock_timeout;
+};
+
+/* Used by mount API to store parsed mount options */
+struct v9fs_context {
+ struct p9_client_opts client_opts;
+ struct p9_fd_opts fd_opts;
+ struct p9_rdma_opts rdma_opts;
+ struct p9_session_opts session_opts;
+};
+
+/**
* struct p9_fid - file system entity handle
* @clnt: back pointer to instantiating &p9_client
* @fid: numeric identifier for this handle
@@ -183,7 +279,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid,
const char *name);
int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
struct p9_fid *newdirfid, const char *new_name);
-struct p9_client *p9_client_create(const char *dev_name, char *options);
+struct p9_client *p9_client_create(struct fs_context *fc);
void p9_client_destroy(struct p9_client *clnt);
void p9_client_disconnect(struct p9_client *clnt);
void p9_client_begin_disconnect(struct p9_client *clnt);
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 766ec07c9599..a912bbaa862f 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -14,6 +14,13 @@
#define P9_DEF_MIN_RESVPORT (665U)
#define P9_DEF_MAX_RESVPORT (1023U)
+#define P9_FD_PORT 564
+
+#define P9_RDMA_PORT 5640
+#define P9_RDMA_SQ_DEPTH 32
+#define P9_RDMA_RQ_DEPTH 32
+#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
+
/**
* struct p9_trans_module - transport module interface
* @list: used to maintain a list of currently available transports
@@ -24,6 +31,9 @@
* we're less flexible when choosing the response message
* size in this case
* @def: set if this transport should be considered the default
+ * @supports_vmalloc: set if this transport can work with vmalloc'd buffers
+ * (non-physically contiguous memory). Transports requiring
+ * DMA should leave this as false.
* @create: member function to create a new connection on this transport
* @close: member function to discard a connection on this transport
* @request: member function to issue a request to the transport
@@ -43,10 +53,11 @@ struct p9_trans_module {
char *name; /* name of transport */
int maxsize; /* max message size of transport */
bool pooled_rbuffers;
- int def; /* this transport should be default */
+ bool def; /* this transport should be default */
+ bool supports_vmalloc; /* can work with vmalloc'd buffers */
struct module *owner;
int (*create)(struct p9_client *client,
- const char *devname, char *args);
+ struct fs_context *fc);
void (*close)(struct p9_client *client);
int (*request)(struct p9_client *client, struct p9_req_t *req);
int (*cancel)(struct p9_client *client, struct p9_req_t *req);
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 91a24b5e0b93..d11b79107930 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -70,6 +70,7 @@ struct tc_action {
#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
#define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4))
+#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5))
/* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice.
@@ -159,7 +160,7 @@ int tc_action_net_init(struct net *net, struct tc_action_net *tn,
{
int err = 0;
- tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL);
+ tn->idrinfo = kmalloc_obj(*tn->idrinfo);
if (!tn->idrinfo)
return -ENOMEM;
tn->ops = ops;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9e5e95988b9e..9e96776945e5 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -8,7 +8,8 @@
#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
-#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */
+/* TEMP_VALID_LIFETIME default value as specified in RFC 8981 3.8 */
+#define TEMP_VALID_LIFETIME (2*86400) /* 2 days */
#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */
#define REGEN_MIN_ADVANCE (2) /* 2 seconds */
#define REGEN_MAX_RETRY (3)
@@ -347,6 +348,11 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
return rcu_dereference_rtnl(dev->ip6_ptr);
}
+static inline struct inet6_dev *in6_dev_rcu(const struct net_device *dev)
+{
+ return rcu_dereference(dev->ip6_ptr);
+}
+
static inline struct inet6_dev *__in6_dev_get_rtnl_net(const struct net_device *dev)
{
return rtnl_net_dereference(dev_net(dev), dev->ip6_ptr);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index d40e978126e3..533d8e75f7bb 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/workqueue.h>
+#include <net/netns/vsock.h>
#include <net/sock.h>
#include <uapi/linux/vm_sockets.h>
@@ -124,7 +125,7 @@ struct vsock_transport {
size_t len, int flags);
int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
struct msghdr *, size_t len);
- bool (*dgram_allow)(u32 cid, u32 port);
+ bool (*dgram_allow)(struct vsock_sock *vsk, u32 cid, u32 port);
/* STREAM. */
/* TODO: stream_bind() */
@@ -136,14 +137,14 @@ struct vsock_transport {
s64 (*stream_has_space)(struct vsock_sock *);
u64 (*stream_rcvhiwat)(struct vsock_sock *);
bool (*stream_is_active)(struct vsock_sock *);
- bool (*stream_allow)(u32 cid, u32 port);
+ bool (*stream_allow)(struct vsock_sock *vsk, u32 cid, u32 port);
/* SEQ_PACKET. */
ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
int flags);
int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
size_t len);
- bool (*seqpacket_allow)(u32 remote_cid);
+ bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid);
u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
/* Notification. */
@@ -216,6 +217,11 @@ void vsock_remove_connected(struct vsock_sock *vsk);
struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst);
+struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr,
+ struct net *net);
+struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src,
+ struct sockaddr_vm *dst,
+ struct net *net);
void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(struct vsock_transport *transport,
void (*fn)(struct sock *sk));
@@ -256,4 +262,62 @@ static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
{
return t->msgzerocopy_allow && t->msgzerocopy_allow();
}
+
+static inline enum vsock_net_mode vsock_net_mode(struct net *net)
+{
+ if (!net)
+ return VSOCK_NET_MODE_GLOBAL;
+
+ return READ_ONCE(net->vsock.mode);
+}
+
+static inline bool vsock_net_mode_global(struct vsock_sock *vsk)
+{
+ return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL;
+}
+
+static inline bool vsock_net_set_child_mode(struct net *net,
+ enum vsock_net_mode mode)
+{
+ int new_locked = mode + 1;
+ int old_locked = 0; /* unlocked */
+
+ if (try_cmpxchg(&net->vsock.child_ns_mode_locked,
+ &old_locked, new_locked)) {
+ WRITE_ONCE(net->vsock.child_ns_mode, mode);
+ return true;
+ }
+
+ return old_locked == new_locked;
+}
+
+static inline enum vsock_net_mode vsock_net_child_mode(struct net *net)
+{
+ return READ_ONCE(net->vsock.child_ns_mode);
+}
+
+/* Return true if two namespaces pass the mode rules. Otherwise, return false.
+ *
+ * A NULL namespace is treated as VSOCK_NET_MODE_GLOBAL.
+ *
+ * Read more about modes in the comment header of net/vmw_vsock/af_vsock.c.
+ */
+static inline bool vsock_net_check_mode(struct net *ns0, struct net *ns1)
+{
+ enum vsock_net_mode mode0, mode1;
+
+ /* Any vsocks within the same network namespace are always reachable,
+ * regardless of the mode.
+ */
+ if (net_eq(ns0, ns1))
+ return true;
+
+ mode0 = vsock_net_mode(ns0);
+ mode1 = vsock_net_mode(ns1);
+
+ /* Different namespaces are only reachable if they are both
+ * global mode.
+ */
+ return mode0 == VSOCK_NET_MODE_GLOBAL && mode0 == mode1;
+}
#endif /* __AF_VSOCK_H__ */
diff --git a/include/net/ax25.h b/include/net/ax25.h
index a7bba42dde15..9fc6a6657266 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -116,10 +116,6 @@ enum {
AX25_PROTO_STD_DUPLEX,
#ifdef CONFIG_AX25_DAMA_SLAVE
AX25_PROTO_DAMA_SLAVE,
-#ifdef CONFIG_AX25_DAMA_MASTER
- AX25_PROTO_DAMA_MASTER,
-#define AX25_PROTO_MAX AX25_PROTO_DAMA_MASTER
-#endif
#endif
__AX25_PROTO_MAX,
AX25_PROTO_MAX = __AX25_PROTO_MAX -1
@@ -138,7 +134,7 @@ enum {
AX25_VALUES_IDLE, /* Connected mode idle timer */
AX25_VALUES_N2, /* Default N2 value */
AX25_VALUES_PACLEN, /* AX.25 MTU */
- AX25_VALUES_PROTOCOL, /* Std AX.25, DAMA Slave, DAMA Master */
+ AX25_VALUES_PROTOCOL, /* Std AX.25, DAMA Slave */
#ifdef CONFIG_AX25_DAMA_SLAVE
AX25_VALUES_DS_TIMEOUT, /* DAMA Slave timeout */
#endif
@@ -215,8 +211,6 @@ typedef struct {
unsigned short slave_timeout; /* when? */
} ax25_dama_info;
-struct ctl_table;
-
typedef struct ax25_dev {
struct list_head list;
@@ -226,7 +220,7 @@ typedef struct ax25_dev {
struct net_device *forward;
struct ctl_table_header *sysheader;
int values[AX25_MAX_VALUES];
-#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
+#ifdef CONFIG_AX25_DAMA_SLAVE
ax25_dama_info dama;
#endif
refcount_t refcount;
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index d46ed9011ee5..69eed69f7f26 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -130,21 +130,30 @@ struct bt_voice {
#define BT_RCVMTU 13
#define BT_PHY 14
-#define BT_PHY_BR_1M_1SLOT 0x00000001
-#define BT_PHY_BR_1M_3SLOT 0x00000002
-#define BT_PHY_BR_1M_5SLOT 0x00000004
-#define BT_PHY_EDR_2M_1SLOT 0x00000008
-#define BT_PHY_EDR_2M_3SLOT 0x00000010
-#define BT_PHY_EDR_2M_5SLOT 0x00000020
-#define BT_PHY_EDR_3M_1SLOT 0x00000040
-#define BT_PHY_EDR_3M_3SLOT 0x00000080
-#define BT_PHY_EDR_3M_5SLOT 0x00000100
-#define BT_PHY_LE_1M_TX 0x00000200
-#define BT_PHY_LE_1M_RX 0x00000400
-#define BT_PHY_LE_2M_TX 0x00000800
-#define BT_PHY_LE_2M_RX 0x00001000
-#define BT_PHY_LE_CODED_TX 0x00002000
-#define BT_PHY_LE_CODED_RX 0x00004000
+#define BT_PHY_BR_1M_1SLOT BIT(0)
+#define BT_PHY_BR_1M_3SLOT BIT(1)
+#define BT_PHY_BR_1M_5SLOT BIT(2)
+#define BT_PHY_EDR_2M_1SLOT BIT(3)
+#define BT_PHY_EDR_2M_3SLOT BIT(4)
+#define BT_PHY_EDR_2M_5SLOT BIT(5)
+#define BT_PHY_EDR_3M_1SLOT BIT(6)
+#define BT_PHY_EDR_3M_3SLOT BIT(7)
+#define BT_PHY_EDR_3M_5SLOT BIT(8)
+#define BT_PHY_LE_1M_TX BIT(9)
+#define BT_PHY_LE_1M_RX BIT(10)
+#define BT_PHY_LE_2M_TX BIT(11)
+#define BT_PHY_LE_2M_RX BIT(12)
+#define BT_PHY_LE_CODED_TX BIT(13)
+#define BT_PHY_LE_CODED_RX BIT(14)
+
+#define BT_PHY_BREDR_MASK (BT_PHY_BR_1M_1SLOT | BT_PHY_BR_1M_3SLOT | \
+ BT_PHY_BR_1M_5SLOT | BT_PHY_EDR_2M_1SLOT | \
+ BT_PHY_EDR_2M_3SLOT | BT_PHY_EDR_2M_5SLOT | \
+ BT_PHY_EDR_3M_1SLOT | BT_PHY_EDR_3M_3SLOT | \
+ BT_PHY_EDR_3M_5SLOT)
+#define BT_PHY_LE_MASK (BT_PHY_LE_1M_TX | BT_PHY_LE_1M_RX | \
+ BT_PHY_LE_2M_TX | BT_PHY_LE_2M_RX | \
+ BT_PHY_LE_CODED_TX | BT_PHY_LE_CODED_RX)
#define BT_MODE 15
@@ -173,7 +182,7 @@ struct bt_iso_io_qos {
__u32 interval;
__u16 latency;
__u16 sdu;
- __u8 phy;
+ __u8 phys;
__u8 rtn;
};
@@ -212,9 +221,9 @@ struct bt_iso_qos {
};
};
-#define BT_ISO_PHY_1M 0x01
-#define BT_ISO_PHY_2M 0x02
-#define BT_ISO_PHY_CODED 0x04
+#define BT_ISO_PHY_1M BIT(0)
+#define BT_ISO_PHY_2M BIT(1)
+#define BT_ISO_PHY_CODED BIT(2)
#define BT_ISO_PHY_ANY (BT_ISO_PHY_1M | BT_ISO_PHY_2M | \
BT_ISO_PHY_CODED)
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index cb4c02d00759..89ad9470fa71 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -647,10 +647,15 @@ enum {
#define HCI_LE_EXT_ADV 0x10
#define HCI_LE_PERIODIC_ADV 0x20
#define HCI_LE_CHAN_SEL_ALG2 0x40
+#define HCI_LE_PAST_SENDER 0x01
+#define HCI_LE_PAST_RECEIVER 0x02
#define HCI_LE_CIS_CENTRAL 0x10
#define HCI_LE_CIS_PERIPHERAL 0x20
#define HCI_LE_ISO_BROADCASTER 0x40
#define HCI_LE_ISO_SYNC_RECEIVER 0x80
+#define HCI_LE_LL_EXT_FEATURE 0x80
+#define HCI_LE_CS 0x40
+#define HCI_LE_CS_HOST 0x80
/* Connection modes */
#define HCI_CM_ACTIVE 0x0000
@@ -1880,6 +1885,15 @@ struct hci_cp_le_set_default_phy {
#define HCI_LE_SET_PHY_2M 0x02
#define HCI_LE_SET_PHY_CODED 0x04
+#define HCI_OP_LE_SET_PHY 0x2032
+struct hci_cp_le_set_phy {
+ __le16 handle;
+ __u8 all_phys;
+ __u8 tx_phys;
+ __u8 rx_phys;
+ __le16 phy_opts;
+} __packed;
+
#define HCI_OP_LE_SET_EXT_SCAN_PARAMS 0x2041
struct hci_cp_le_set_ext_scan_params {
__u8 own_addr_type;
@@ -2068,6 +2082,44 @@ struct hci_cp_le_set_privacy_mode {
__u8 mode;
} __packed;
+#define HCI_OP_LE_PAST 0x205a
+struct hci_cp_le_past {
+ __le16 handle;
+ __le16 service_data;
+ __le16 sync_handle;
+} __packed;
+
+struct hci_rp_le_past {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_PAST_SET_INFO 0x205b
+struct hci_cp_le_past_set_info {
+ __le16 handle;
+ __le16 service_data;
+ __u8 adv_handle;
+} __packed;
+
+struct hci_rp_le_past_set_info {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_PAST_PARAMS 0x205c
+struct hci_cp_le_past_params {
+ __le16 handle;
+ __u8 mode;
+ __le16 skip;
+ __le16 sync_timeout;
+ __u8 cte_type;
+} __packed;
+
+struct hci_rp_le_past_params {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
#define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060
struct hci_rp_le_read_buffer_size_v2 {
__u8 status;
@@ -2095,8 +2147,8 @@ struct hci_cis_params {
__u8 cis_id;
__le16 c_sdu;
__le16 p_sdu;
- __u8 c_phy;
- __u8 p_phy;
+ __u8 c_phys;
+ __u8 p_phys;
__u8 c_rtn;
__u8 p_rtn;
} __packed;
@@ -2215,6 +2267,217 @@ struct hci_cp_le_set_host_feature {
__u8 bit_value;
} __packed;
+#define HCI_OP_LE_READ_ALL_LOCAL_FEATURES 0x2087
+struct hci_rp_le_read_all_local_features {
+ __u8 status;
+ __u8 page;
+ __u8 features[248];
+} __packed;
+
+#define HCI_OP_LE_READ_ALL_REMOTE_FEATURES 0x2088
+struct hci_cp_le_read_all_remote_features {
+ __le16 handle;
+ __u8 pages;
+} __packed;
+
+/* Channel Sounding Commands */
+#define HCI_OP_LE_CS_RD_LOCAL_SUPP_CAP 0x2089
+struct hci_rp_le_cs_rd_local_supp_cap {
+ __u8 status;
+ __u8 num_config_supported;
+ __le16 max_consecutive_procedures_supported;
+ __u8 num_antennas_supported;
+ __u8 max_antenna_paths_supported;
+ __u8 roles_supported;
+ __u8 modes_supported;
+ __u8 rtt_capability;
+ __u8 rtt_aa_only_n;
+ __u8 rtt_sounding_n;
+ __u8 rtt_random_payload_n;
+ __le16 nadm_sounding_capability;
+ __le16 nadm_random_capability;
+ __u8 cs_sync_phys_supported;
+ __le16 subfeatures_supported;
+ __le16 t_ip1_times_supported;
+ __le16 t_ip2_times_supported;
+ __le16 t_fcs_times_supported;
+ __le16 t_pm_times_supported;
+ __u8 t_sw_time_supported;
+ __u8 tx_snr_capability;
+} __packed;
+
+#define HCI_OP_LE_CS_RD_RMT_SUPP_CAP 0x208A
+struct hci_cp_le_cs_rd_local_supp_cap {
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_CS_WR_CACHED_RMT_SUPP_CAP 0x208B
+struct hci_cp_le_cs_wr_cached_rmt_supp_cap {
+ __le16 handle;
+ __u8 num_config_supported;
+ __le16 max_consecutive_procedures_supported;
+ __u8 num_antennas_supported;
+ __u8 max_antenna_paths_supported;
+ __u8 roles_supported;
+ __u8 modes_supported;
+ __u8 rtt_capability;
+ __u8 rtt_aa_only_n;
+ __u8 rtt_sounding_n;
+ __u8 rtt_random_payload_n;
+ __le16 nadm_sounding_capability;
+ __le16 nadm_random_capability;
+ __u8 cs_sync_phys_supported;
+ __le16 subfeatures_supported;
+ __le16 t_ip1_times_supported;
+ __le16 t_ip2_times_supported;
+ __le16 t_fcs_times_supported;
+ __le16 t_pm_times_supported;
+ __u8 t_sw_time_supported;
+ __u8 tx_snr_capability;
+} __packed;
+
+struct hci_rp_le_cs_wr_cached_rmt_supp_cap {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_CS_SEC_ENABLE 0x208C
+struct hci_cp_le_cs_sec_enable {
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_CS_SET_DEFAULT_SETTINGS 0x208D
+struct hci_cp_le_cs_set_default_settings {
+ __le16 handle;
+ __u8 role_enable;
+ __u8 cs_sync_ant_sel;
+ __s8 max_tx_power;
+} __packed;
+
+struct hci_rp_le_cs_set_default_settings {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_CS_RD_RMT_FAE_TABLE 0x208E
+struct hci_cp_le_cs_rd_rmt_fae_table {
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_CS_WR_CACHED_RMT_FAE_TABLE 0x208F
+struct hci_cp_le_cs_wr_rmt_cached_fae_table {
+ __le16 handle;
+ __u8 remote_fae_table[72];
+} __packed;
+
+struct hci_rp_le_cs_wr_rmt_cached_fae_table {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_CS_CREATE_CONFIG 0x2090
+struct hci_cp_le_cs_create_config {
+ __le16 handle;
+ __u8 config_id;
+ __u8 create_context;
+ __u8 main_mode_type;
+ __u8 sub_mode_type;
+ __u8 min_main_mode_steps;
+ __u8 max_main_mode_steps;
+ __u8 main_mode_repetition;
+ __u8 mode_0_steps;
+ __u8 role;
+ __u8 rtt_type;
+ __u8 cs_sync_phy;
+ __u8 channel_map[10];
+ __u8 channel_map_repetition;
+ __u8 channel_selection_type;
+ __u8 ch3c_shape;
+ __u8 ch3c_jump;
+ __u8 reserved;
+} __packed;
+
+#define HCI_OP_LE_CS_REMOVE_CONFIG 0x2091
+struct hci_cp_le_cs_remove_config {
+ __le16 handle;
+ __u8 config_id;
+} __packed;
+
+#define HCI_OP_LE_CS_SET_CH_CLASSIFICATION 0x2092
+struct hci_cp_le_cs_set_ch_classification {
+ __u8 ch_classification[10];
+} __packed;
+
+struct hci_rp_le_cs_set_ch_classification {
+ __u8 status;
+} __packed;
+
+#define HCI_OP_LE_CS_SET_PROC_PARAM 0x2093
+struct hci_cp_le_cs_set_proc_param {
+ __le16 handle;
+ __u8 config_id;
+ __le16 max_procedure_len;
+ __le16 min_procedure_interval;
+ __le16 max_procedure_interval;
+ __le16 max_procedure_count;
+ __u8 min_subevent_len[3];
+ __u8 max_subevent_len[3];
+ __u8 tone_antenna_config_selection;
+ __u8 phy;
+ __u8 tx_power_delta;
+ __u8 preferred_peer_antenna;
+ __u8 snr_control_initiator;
+ __u8 snr_control_reflector;
+} __packed;
+
+struct hci_rp_le_cs_set_proc_param {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_OP_LE_CS_SET_PROC_ENABLE 0x2094
+struct hci_cp_le_cs_set_proc_enable {
+ __le16 handle;
+ __u8 config_id;
+ __u8 enable;
+} __packed;
+
+#define HCI_OP_LE_CS_TEST 0x2095
+struct hci_cp_le_cs_test {
+ __u8 main_mode_type;
+ __u8 sub_mode_type;
+ __u8 main_mode_repetition;
+ __u8 mode_0_steps;
+ __u8 role;
+ __u8 rtt_type;
+ __u8 cs_sync_phy;
+ __u8 cs_sync_antenna_selection;
+ __u8 subevent_len[3];
+ __le16 subevent_interval;
+ __u8 max_num_subevents;
+ __u8 transmit_power_level;
+ __u8 t_ip1_time;
+ __u8 t_ip2_time;
+ __u8 t_fcs_time;
+ __u8 t_pm_time;
+ __u8 t_sw_time;
+ __u8 tone_antenna_config_selection;
+ __u8 reserved;
+ __u8 snr_control_initiator;
+ __u8 snr_control_reflector;
+ __le16 drbg_nonce;
+ __u8 channel_map_repetition;
+ __le16 override_config;
+ __u8 override_parameters_length;
+ __u8 override_parameters_data[];
+} __packed;
+
+struct hci_rp_le_cs_test {
+ __u8 status;
+} __packed;
+
+#define HCI_OP_LE_CS_TEST_END 0x2096
+
/* ---- HCI Events ---- */
struct hci_ev_status {
__u8 status;
@@ -2800,6 +3063,20 @@ struct hci_evt_le_ext_adv_set_term {
__u8 num_evts;
} __packed;
+#define HCI_EV_LE_PAST_RECEIVED 0x18
+struct hci_ev_le_past_received {
+ __u8 status;
+ __le16 handle;
+ __le16 service_data;
+ __le16 sync_handle;
+ __u8 sid;
+ __u8 bdaddr_type;
+ bdaddr_t bdaddr;
+ __u8 phy;
+ __le16 interval;
+ __u8 clock_accuracy;
+} __packed;
+
#define HCI_EVT_LE_CIS_ESTABLISHED 0x19
struct hci_evt_le_cis_established {
__u8 status;
@@ -2883,6 +3160,138 @@ struct hci_evt_le_big_info_adv_report {
__u8 encryption;
} __packed;
+#define HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE 0x2b
+struct hci_evt_le_read_all_remote_features_complete {
+ __u8 status;
+ __le16 handle;
+ __u8 max_pages;
+ __u8 valid_pages;
+ __u8 features[248];
+} __packed;
+
+/* Channel Sounding Events */
+#define HCI_EVT_LE_CS_READ_RMT_SUPP_CAP_COMPLETE 0x2C
+struct hci_evt_le_cs_read_rmt_supp_cap_complete {
+ __u8 status;
+ __le16 handle;
+ __u8 num_configs_supp;
+ __le16 max_consec_proc_supp;
+ __u8 num_ant_supp;
+ __u8 max_ant_path_supp;
+ __u8 roles_supp;
+ __u8 modes_supp;
+ __u8 rtt_cap;
+ __u8 rtt_aa_only_n;
+ __u8 rtt_sounding_n;
+ __u8 rtt_rand_payload_n;
+ __le16 nadm_sounding_cap;
+ __le16 nadm_rand_cap;
+ __u8 cs_sync_phys_supp;
+ __le16 sub_feat_supp;
+ __le16 t_ip1_times_supp;
+ __le16 t_ip2_times_supp;
+ __le16 t_fcs_times_supp;
+ __le16 t_pm_times_supp;
+ __u8 t_sw_times_supp;
+ __u8 tx_snr_cap;
+} __packed;
+
+#define HCI_EVT_LE_CS_READ_RMT_FAE_TABLE_COMPLETE 0x2D
+struct hci_evt_le_cs_read_rmt_fae_table_complete {
+ __u8 status;
+ __le16 handle;
+ __u8 remote_fae_table[72];
+} __packed;
+
+#define HCI_EVT_LE_CS_SECURITY_ENABLE_COMPLETE 0x2E
+struct hci_evt_le_cs_security_enable_complete {
+ __u8 status;
+ __le16 handle;
+} __packed;
+
+#define HCI_EVT_LE_CS_CONFIG_COMPLETE 0x2F
+struct hci_evt_le_cs_config_complete {
+ __u8 status;
+ __le16 handle;
+ __u8 config_id;
+ __u8 action;
+ __u8 main_mode_type;
+ __u8 sub_mode_type;
+ __u8 min_main_mode_steps;
+ __u8 max_main_mode_steps;
+ __u8 main_mode_rep;
+ __u8 mode_0_steps;
+ __u8 role;
+ __u8 rtt_type;
+ __u8 cs_sync_phy;
+ __u8 channel_map[10];
+ __u8 channel_map_rep;
+ __u8 channel_sel_type;
+ __u8 ch3c_shape;
+ __u8 ch3c_jump;
+ __u8 reserved;
+ __u8 t_ip1_time;
+ __u8 t_ip2_time;
+ __u8 t_fcs_time;
+ __u8 t_pm_time;
+} __packed;
+
+#define HCI_EVT_LE_CS_PROCEDURE_ENABLE_COMPLETE 0x30
+struct hci_evt_le_cs_procedure_enable_complete {
+ __u8 status;
+ __le16 handle;
+ __u8 config_id;
+ __u8 state;
+ __u8 tone_ant_config_sel;
+ __s8 sel_tx_pwr;
+ __u8 sub_evt_len[3];
+ __u8 sub_evts_per_evt;
+ __le16 sub_evt_intrvl;
+ __le16 evt_intrvl;
+ __le16 proc_intrvl;
+ __le16 proc_counter;
+ __le16 max_proc_len;
+} __packed;
+
+#define HCI_EVT_LE_CS_SUBEVENT_RESULT 0x31
+struct hci_evt_le_cs_subevent_result {
+ __le16 handle;
+ __u8 config_id;
+ __le16 start_acl_conn_evt_counter;
+ __le16 proc_counter;
+ __le16 freq_comp;
+ __u8 ref_pwr_lvl;
+ __u8 proc_done_status;
+ __u8 subevt_done_status;
+ __u8 abort_reason;
+ __u8 num_ant_paths;
+ __u8 num_steps_reported;
+ __u8 step_mode[0]; /* depends on num_steps_reported */
+ __u8 step_channel[0]; /* depends on num_steps_reported */
+ __u8 step_data_length[0]; /* depends on num_steps_reported */
+ __u8 step_data[0]; /* depends on num_steps_reported */
+} __packed;
+
+#define HCI_EVT_LE_CS_SUBEVENT_RESULT_CONTINUE 0x32
+struct hci_evt_le_cs_subevent_result_continue {
+ __le16 handle;
+ __u8 config_id;
+ __u8 proc_done_status;
+ __u8 subevt_done_status;
+ __u8 abort_reason;
+ __u8 num_ant_paths;
+ __u8 num_steps_reported;
+ __u8 step_mode[0]; /* depends on num_steps_reported */
+ __u8 step_channel[0]; /* depends on num_steps_reported */
+ __u8 step_data_length[0]; /* depends on num_steps_reported */
+ __u8 step_data[0]; /* depends on num_steps_reported */
+} __packed;
+
+#define HCI_EVT_LE_CS_TEST_END_COMPLETE 0x33
+struct hci_evt_le_cs_test_end_complete {
+ __u8 status;
+} __packed;
+
#define HCI_EV_VENDOR 0xff
/* Internal events generated by Bluetooth stack */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0cb87687837f..a7bffb908c1e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -166,6 +166,7 @@ enum hci_conn_flags {
HCI_CONN_FLAG_REMOTE_WAKEUP = BIT(0),
HCI_CONN_FLAG_DEVICE_PRIVACY = BIT(1),
HCI_CONN_FLAG_ADDRESS_RESOLUTION = BIT(2),
+ HCI_CONN_FLAG_PAST = BIT(3),
};
typedef u8 hci_conn_flags_t;
@@ -377,7 +378,7 @@ struct hci_dev {
__u8 minor_class;
__u8 max_page;
__u8 features[HCI_MAX_PAGES][8];
- __u8 le_features[8];
+ __u8 le_features[248];
__u8 le_accept_list_size;
__u8 le_resolv_list_size;
__u8 le_num_of_adv_sets;
@@ -701,6 +702,7 @@ struct hci_conn {
__u8 attempt;
__u8 dev_class[3];
__u8 features[HCI_MAX_PAGES][8];
+ __u8 le_features[248];
__u16 pkt_type;
__u16 link_policy;
__u8 key_type;
@@ -728,6 +730,8 @@ struct hci_conn {
__u16 le_per_adv_data_offset;
__u8 le_adv_phy;
__u8 le_adv_sec_phy;
+ __u8 le_tx_def_phys;
+ __u8 le_rx_def_phys;
__u8 le_tx_phy;
__u8 le_rx_phy;
__s8 rssi;
@@ -1570,9 +1574,9 @@ int hci_le_create_cis_pending(struct hci_dev *hdev);
int hci_conn_check_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
- u8 role, u16 handle);
+ u8 dst_type, u8 role, u16 handle);
struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
- bdaddr_t *dst, u8 role);
+ bdaddr_t *dst, u8 dst_type, u8 role);
void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev);
@@ -1601,6 +1605,7 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid,
struct bt_iso_qos *qos,
__u8 base_len, __u8 *base, u16 timeout);
+int hci_past_bis(struct hci_conn *conn, bdaddr_t *dst, __u8 dst_type);
struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos,
u16 timeout);
@@ -2053,6 +2058,26 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define sync_recv_capable(dev) \
((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER)
#define sync_recv_enabled(dev) (le_enabled(dev) && sync_recv_capable(dev))
+#define past_sender_capable(dev) \
+ ((dev)->le_features[3] & HCI_LE_PAST_SENDER)
+#define past_receiver_capable(dev) \
+ ((dev)->le_features[3] & HCI_LE_PAST_RECEIVER)
+#define past_capable(dev) \
+ (past_sender_capable(dev) || past_receiver_capable(dev))
+#define past_sender_enabled(dev) \
+ (le_enabled(dev) && past_sender_capable(dev))
+#define past_receiver_enabled(dev) \
+ (le_enabled(dev) && past_receiver_capable(dev))
+#define past_enabled(dev) \
+ (past_sender_enabled(dev) || past_receiver_enabled(dev))
+#define ll_ext_feature_capable(dev) \
+ ((dev)->le_features[7] & HCI_LE_LL_EXT_FEATURE)
+
+/* Channel sounding support */
+#define le_cs_capable(dev) \
+ ((dev)->le_features[5] & HCI_LE_CS)
+#define le_cs_host_capable(dev) \
+ ((dev)->le_features[5] & HCI_LE_CS_HOST)
#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \
(!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG)))
@@ -2317,6 +2342,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
void *hci_recv_event_data(struct hci_dev *hdev, __u8 event);
u32 hci_conn_get_phy(struct hci_conn *conn);
+int hci_conn_set_phy(struct hci_conn *conn, u32 phys);
/* ----- HCI Sockets ----- */
void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index e352a4e0ef8d..73e494b2591d 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -188,3 +188,9 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn,
int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn);
int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_past_sync(struct hci_conn *conn, struct hci_conn *le);
+
+int hci_le_read_remote_features(struct hci_conn *conn);
+
+int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type);
+int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 00e182a22720..010f1a8fd15f 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -284,9 +284,9 @@ struct l2cap_conn_rsp {
#define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007
#define L2CAP_CR_LE_ENCRYPTION 0x0008
#define L2CAP_CR_LE_INVALID_SCID 0x0009
-#define L2CAP_CR_LE_SCID_IN_USE 0X000A
-#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B
-#define L2CAP_CR_LE_INVALID_PARAMS 0X000C
+#define L2CAP_CR_LE_SCID_IN_USE 0x000A
+#define L2CAP_CR_LE_UNACCEPT_PARAMS 0x000B
+#define L2CAP_CR_LE_INVALID_PARAMS 0x000C
/* connect/create channel status */
#define L2CAP_CS_NO_INFO 0x0000
@@ -493,6 +493,8 @@ struct l2cap_ecred_reconf_req {
#define L2CAP_RECONF_SUCCESS 0x0000
#define L2CAP_RECONF_INVALID_MTU 0x0001
#define L2CAP_RECONF_INVALID_MPS 0x0002
+#define L2CAP_RECONF_INVALID_CID 0x0003
+#define L2CAP_RECONF_INVALID_PARAMS 0x0004
struct l2cap_ecred_reconf_rsp {
__le16 result;
@@ -655,8 +657,7 @@ struct l2cap_conn {
struct sk_buff *rx_skb;
__u32 rx_len;
- __u8 tx_ident;
- struct mutex ident_lock;
+ struct ida tx_ida;
struct sk_buff_head pending_rx;
struct work_struct pending_rx_work;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index f5be96f08b9d..8234915854b6 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -119,6 +119,8 @@ struct mgmt_rp_read_index_list {
#define MGMT_SETTING_ISO_BROADCASTER BIT(20)
#define MGMT_SETTING_ISO_SYNC_RECEIVER BIT(21)
#define MGMT_SETTING_LL_PRIVACY BIT(22)
+#define MGMT_SETTING_PAST_SENDER BIT(23)
+#define MGMT_SETTING_PAST_RECEIVER BIT(24)
#define MGMT_OP_READ_INFO 0x0004
#define MGMT_READ_INFO_SIZE 0
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 49edc7da0586..395c6e281c5f 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -254,6 +254,7 @@ struct bonding {
struct delayed_work ad_work;
struct delayed_work mcast_work;
struct delayed_work slave_arr_work;
+ struct delayed_work peer_notify_work;
#ifdef CONFIG_DEBUG_FS
/* debugging support via debugfs */
struct dentry *debug_dir;
@@ -521,13 +522,14 @@ static inline int bond_is_ip6_target_ok(struct in6_addr *addr)
static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond,
struct slave *slave)
{
+ unsigned long tmp, ret = READ_ONCE(slave->target_last_arp_rx[0]);
int i = 1;
- unsigned long ret = slave->target_last_arp_rx[0];
-
- for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++)
- if (time_before(slave->target_last_arp_rx[i], ret))
- ret = slave->target_last_arp_rx[i];
+ for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++) {
+ tmp = READ_ONCE(slave->target_last_arp_rx[i]);
+ if (time_before(tmp, ret))
+ ret = tmp;
+ }
return ret;
}
@@ -537,7 +539,7 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL)
return slave_oldest_target_arp_rx(bond, slave);
- return slave->last_rx;
+ return READ_ONCE(slave->last_rx);
}
static inline void slave_update_last_tx(struct slave *slave)
@@ -697,6 +699,7 @@ void bond_debug_register(struct bonding *bond);
void bond_debug_unregister(struct bonding *bond);
void bond_debug_reregister(struct bonding *bond);
const char *bond_mode_name(int mode);
+bool __bond_xdp_check(int mode, int xmit_policy);
bool bond_xdp_check(struct bonding *bond, int mode);
void bond_setup(struct net_device *bond_dev);
unsigned int bond_get_num_tx_queues(void);
@@ -709,6 +712,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
int level);
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
void bond_work_init_all(struct bonding *bond);
void bond_work_cancel_all(struct bonding *bond);
diff --git a/include/net/can.h b/include/net/can.h
new file mode 100644
index 000000000000..6db9e826f0e0
--- /dev/null
+++ b/include/net/can.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * net/can.h
+ *
+ * Definitions for the CAN network socket buffer extensions
+ *
+ * Copyright (C) 2026 Oliver Hartkopp <socketcan@hartkopp.net>
+ *
+ */
+
+#ifndef _NET_CAN_H
+#define _NET_CAN_H
+
+/**
+ * struct can_skb_ext - skb extensions for CAN specific content
+ * @can_iif: ifindex of the first interface the CAN frame appeared on
+ * @can_framelen: cached echo CAN frame length for bql
+ * @can_gw_hops: can-gw CAN frame time-to-live counter
+ * @can_ext_flags: CAN skb extensions flags
+ */
+struct can_skb_ext {
+ int can_iif;
+ u16 can_framelen;
+ u8 can_gw_hops;
+ u8 can_ext_flags;
+};
+
+#endif /* _NET_CAN_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 820e299f06b5..fc01de19c798 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2025 Intel Corporation
+ * Copyright (C) 2018-2026 Intel Corporation
*/
#include <linux/ethtool.h>
@@ -126,6 +126,7 @@ struct wiphy;
* @IEEE80211_CHAN_NO_4MHZ: 4 MHz bandwidth is not permitted on this channel.
* @IEEE80211_CHAN_NO_8MHZ: 8 MHz bandwidth is not permitted on this channel.
* @IEEE80211_CHAN_NO_16MHZ: 16 MHz bandwidth is not permitted on this channel.
+ * @IEEE80211_CHAN_NO_UHR: UHR operation is not permitted on this channel.
*/
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = BIT(0),
@@ -143,6 +144,7 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_NO_10MHZ = BIT(12),
IEEE80211_CHAN_NO_HE = BIT(13),
/* can use free bits here */
+ IEEE80211_CHAN_NO_UHR = BIT(18),
IEEE80211_CHAN_NO_320MHZ = BIT(19),
IEEE80211_CHAN_NO_EHT = BIT(20),
IEEE80211_CHAN_DFS_CONCURRENT = BIT(21),
@@ -429,6 +431,18 @@ struct ieee80211_sta_eht_cap {
u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN];
};
+/**
+ * struct ieee80211_sta_uhr_cap - STA's UHR capabilities
+ * @has_uhr: true iff UHR is supported and data is valid
+ * @mac: fixed MAC capabilities
+ * @phy: fixed PHY capabilities
+ */
+struct ieee80211_sta_uhr_cap {
+ bool has_uhr;
+ struct ieee80211_uhr_cap_mac mac;
+ struct ieee80211_uhr_cap_phy phy;
+};
+
/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */
#ifdef __CHECKER__
/*
@@ -454,6 +468,7 @@ struct ieee80211_sta_eht_cap {
* @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a
* 6 GHz band channel (and 0 may be valid value).
* @eht_cap: STA's EHT capabilities
+ * @uhr_cap: STA's UHR capabilities
* @vendor_elems: vendor element(s) to advertise
* @vendor_elems.data: vendor element(s) data
* @vendor_elems.len: vendor element(s) length
@@ -463,6 +478,7 @@ struct ieee80211_sband_iftype_data {
struct ieee80211_sta_he_cap he_cap;
struct ieee80211_he_6ghz_capa he_6ghz_capa;
struct ieee80211_sta_eht_cap eht_cap;
+ struct ieee80211_sta_uhr_cap uhr_cap;
struct {
const u8 *data;
unsigned int len;
@@ -685,7 +701,7 @@ ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband,
}
/**
- * ieee80211_get_eht_iftype_cap - return ETH capabilities for an sband's iftype
+ * ieee80211_get_eht_iftype_cap - return EHT capabilities for an sband's iftype
* @sband: the sband to search for the iftype on
* @iftype: enum nl80211_iftype
*
@@ -705,6 +721,26 @@ ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband,
}
/**
+ * ieee80211_get_uhr_iftype_cap - return UHR capabilities for an sband's iftype
+ * @sband: the sband to search for the iftype on
+ * @iftype: enum nl80211_iftype
+ *
+ * Return: pointer to the struct ieee80211_sta_uhr_cap, or NULL is none found
+ */
+static inline const struct ieee80211_sta_uhr_cap *
+ieee80211_get_uhr_iftype_cap(const struct ieee80211_supported_band *sband,
+ enum nl80211_iftype iftype)
+{
+ const struct ieee80211_sband_iftype_data *data =
+ ieee80211_get_sband_iftype_data(sband, iftype);
+
+ if (data && data->uhr_cap.has_uhr)
+ return &data->uhr_cap;
+
+ return NULL;
+}
+
+/**
* wiphy_read_of_freq_limits - read frequency limits from device tree
*
* @wiphy: the wireless device to get extra limits for
@@ -786,8 +822,7 @@ struct vif_params {
* @key: key material
* @key_len: length of key material
* @cipher: cipher suite selector
- * @seq: sequence counter (IV/PN) for TKIP and CCMP keys, only used
- * with the get_key() callback, must be in little endian,
+ * @seq: sequence counter (IV/PN), must be in little endian,
* length given by @seq_len.
* @seq_len: length of @seq.
* @vlan_id: vlan_id for VLAN group key (if nonzero)
@@ -975,7 +1010,8 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
chandef1->center_freq1 == chandef2->center_freq1 &&
chandef1->freq1_offset == chandef2->freq1_offset &&
chandef1->center_freq2 == chandef2->center_freq2 &&
- chandef1->punctured == chandef2->punctured);
+ chandef1->punctured == chandef2->punctured &&
+ chandef1->s1g_primary_2mhz == chandef2->s1g_primary_2mhz);
}
/**
@@ -1015,6 +1051,7 @@ const struct cfg80211_chan_def *
cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
const struct cfg80211_chan_def *chandef2);
+
/**
* nl80211_chan_width_to_mhz - get the channel width in MHz
* @chan_width: the channel width from &enum nl80211_chan_width
@@ -1485,6 +1522,7 @@ struct cfg80211_s1g_short_beacon {
* @he_cap: HE capabilities (or %NULL if HE isn't enabled)
* @eht_cap: EHT capabilities (or %NULL if EHT isn't enabled)
* @eht_oper: EHT operation IE (or %NULL if EHT isn't enabled)
+ * @uhr_oper: UHR operation (or %NULL if UHR isn't enabled)
* @ht_required: stations must support HT
* @vht_required: stations must support VHT
* @twt_responder: Enable Target Wait Time
@@ -1524,6 +1562,7 @@ struct cfg80211_ap_settings {
const struct ieee80211_he_operation *he_oper;
const struct ieee80211_eht_cap_elem *eht_cap;
const struct ieee80211_eht_operation *eht_oper;
+ const struct ieee80211_uhr_operation *uhr_oper;
bool ht_required, vht_required, he_required, sae_h2e_required;
bool twt_responder;
u32 flags;
@@ -1697,6 +1736,8 @@ struct sta_txpwr {
* @eht_capa: EHT capabilities of station
* @eht_capa_len: the length of the EHT capabilities
* @s1g_capa: S1G capabilities of station
+ * @uhr_capa: UHR capabilities of the station
+ * @uhr_capa_len: the length of the UHR capabilities
*/
struct link_station_parameters {
const u8 *mld_mac;
@@ -1716,6 +1757,8 @@ struct link_station_parameters {
const struct ieee80211_eht_cap_elem *eht_capa;
u8 eht_capa_len;
const struct ieee80211_s1g_cap *s1g_capa;
+ const struct ieee80211_uhr_cap *uhr_capa;
+ u8 uhr_capa_len;
};
/**
@@ -1784,6 +1827,7 @@ struct cfg80211_ttlm_params {
* present/updated
* @eml_cap: EML capabilities of this station
* @link_sta_params: link related params.
+ * @epp_peer: EPP peer indication
*/
struct station_parameters {
struct net_device *vlan;
@@ -1810,6 +1854,7 @@ struct station_parameters {
bool eml_cap_present;
u16 eml_cap;
struct link_station_parameters link_sta_params;
+ bool epp_peer;
};
/**
@@ -1895,6 +1940,11 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
* @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS
* @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information
* @RATE_INFO_FLAGS_S1G_MCS: MCS field filled with S1G MCS
+ * @RATE_INFO_FLAGS_UHR_MCS: UHR MCS information
+ * @RATE_INFO_FLAGS_UHR_ELR_MCS: UHR ELR MCS was used
+ * (set together with @RATE_INFO_FLAGS_UHR_MCS)
+ * @RATE_INFO_FLAGS_UHR_IM: UHR Interference Mitigation
+ * was used
*/
enum rate_info_flags {
RATE_INFO_FLAGS_MCS = BIT(0),
@@ -1906,6 +1956,9 @@ enum rate_info_flags {
RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6),
RATE_INFO_FLAGS_EHT_MCS = BIT(7),
RATE_INFO_FLAGS_S1G_MCS = BIT(8),
+ RATE_INFO_FLAGS_UHR_MCS = BIT(9),
+ RATE_INFO_FLAGS_UHR_ELR_MCS = BIT(10),
+ RATE_INFO_FLAGS_UHR_IM = BIT(11),
};
/**
@@ -1921,7 +1974,7 @@ enum rate_info_flags {
* @RATE_INFO_BW_160: 160 MHz bandwidth
* @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
* @RATE_INFO_BW_320: 320 MHz bandwidth
- * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation
+ * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT/UHR RU allocation
* @RATE_INFO_BW_1: 1 MHz bandwidth
* @RATE_INFO_BW_2: 2 MHz bandwidth
* @RATE_INFO_BW_4: 4 MHz bandwidth
@@ -1952,7 +2005,7 @@ enum rate_info_bw {
*
* @flags: bitflag of flags from &enum rate_info_flags
* @legacy: bitrate in 100kbit/s for 802.11abg
- * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G rate
+ * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G/UHR rate
* @nss: number of streams (VHT & HE only)
* @bw: bandwidth (from &enum rate_info_bw)
* @he_gi: HE guard interval (from &enum nl80211_he_gi)
@@ -3220,8 +3273,6 @@ struct cfg80211_auth_request {
* if this is %NULL for a link, that link is not requested
* @elems: extra elements for the per-STA profile for this link
* @elems_len: length of the elements
- * @disabled: If set this link should be included during association etc. but it
- * should not be used until enabled by the AP MLD.
* @error: per-link error code, must be <= 0. If there is an error, then the
* operation as a whole must fail.
*/
@@ -3229,7 +3280,6 @@ struct cfg80211_assoc_link {
struct cfg80211_bss *bss;
const u8 *elems;
size_t elems_len;
- bool disabled;
int error;
};
@@ -3262,6 +3312,7 @@ struct cfg80211_ml_reconf_req {
* Drivers shall disable MLO features for the current association if this
* flag is not set.
* @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any)
+ * @ASSOC_REQ_DISABLE_UHR: Disable UHR
*/
enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HT = BIT(0),
@@ -3272,6 +3323,7 @@ enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_EHT = BIT(5),
CONNECT_REQ_MLO_SUPPORT = BIT(6),
ASSOC_REQ_SPP_AMSDU = BIT(7),
+ ASSOC_REQ_DISABLE_UHR = BIT(8),
};
/**
@@ -4189,6 +4241,7 @@ struct cfg80211_ftm_responder_stats {
* @num_bursts_exp: actual number of bursts exponent negotiated
* @burst_duration: actual burst duration negotiated
* @ftms_per_burst: actual FTMs per burst negotiated
+ * @burst_period: actual burst period negotiated in units of 100ms
* @lci_len: length of LCI information (if present)
* @civicloc_len: length of civic location information (if present)
* @lci: LCI data (may be %NULL)
@@ -4230,6 +4283,7 @@ struct cfg80211_pmsr_ftm_result {
u8 num_bursts_exp;
u8 burst_duration;
u8 ftms_per_burst;
+ u16 burst_period;
s32 rssi_avg;
s32 rssi_spread;
struct rate_info tx_rate, rx_rate;
@@ -4292,7 +4346,9 @@ struct cfg80211_pmsr_result {
* @burst_period: burst period to use
* @asap: indicates to use ASAP mode
* @num_bursts_exp: number of bursts exponent
- * @burst_duration: burst duration
+ * @burst_duration: burst duration. If @trigger_based or @non_trigger_based is
+ * set, this is the burst duration in milliseconds, and zero means the
+ * device should pick an appropriate value based on @ftms_per_burst.
* @ftms_per_burst: number of FTMs per burst
* @ftmr_retries: number of retries for FTM request
* @request_lci: request LCI information
@@ -4305,6 +4361,8 @@ struct cfg80211_pmsr_result {
* EDCA based ranging will be used.
* @lmr_feedback: negotiate for I2R LMR feedback. Only valid if either
* @trigger_based or @non_trigger_based is set.
+ * @rsta: Operate as the RSTA in the measurement. Only valid if @lmr_feedback
+ * and either @trigger_based or @non_trigger_based is set.
* @bss_color: the bss color of the responder. Optional. Set to zero to
* indicate the driver should set the BSS color. Only valid if
* @non_trigger_based or @trigger_based is set.
@@ -4320,7 +4378,8 @@ struct cfg80211_pmsr_ftm_request_peer {
request_civicloc:1,
trigger_based:1,
non_trigger_based:1,
- lmr_feedback:1;
+ lmr_feedback:1,
+ rsta:1;
u8 num_bursts_exp;
u8 burst_duration;
u8 ftms_per_burst;
@@ -5640,6 +5699,18 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type);
* not limited)
* @ftm.trigger_based: trigger based ranging measurement is supported
* @ftm.non_trigger_based: non trigger based ranging measurement is supported
+ * @ftm.support_6ghz: supports ranging in 6 GHz band
+ * @ftm.max_tx_ltf_rep: maximum number of TX LTF repetitions supported (0 means
+ * only one LTF, no repetitions)
+ * @ftm.max_rx_ltf_rep: maximum number of RX LTF repetitions supported (0 means
+ * only one LTF, no repetitions)
+ * @ftm.max_tx_sts: maximum number of TX STS supported (zero based)
+ * @ftm.max_rx_sts: maximum number of RX STS supported (zero based)
+ * @ftm.max_total_ltf_tx: maximum total number of LTFs that can be transmitted
+ * (0 means unknown)
+ * @ftm.max_total_ltf_rx: maximum total number of LTFs that can be received
+ * (0 means unknown)
+ * @ftm.support_rsta: supports operating as RSTA in PMSR FTM request
*/
struct cfg80211_pmsr_capabilities {
unsigned int max_peers;
@@ -5657,7 +5728,15 @@ struct cfg80211_pmsr_capabilities {
request_lci:1,
request_civicloc:1,
trigger_based:1,
- non_trigger_based:1;
+ non_trigger_based:1,
+ support_6ghz:1;
+ u8 max_tx_ltf_rep;
+ u8 max_rx_ltf_rep;
+ u8 max_tx_sts;
+ u8 max_rx_sts;
+ u8 max_total_ltf_tx;
+ u8 max_total_ltf_rx;
+ u8 support_rsta:1;
} ftm;
};
@@ -5683,9 +5762,13 @@ struct wiphy_iftype_akm_suites {
*
* @rts_threshold: RTS threshold (dot11RTSThreshold);
* -1 (default) = RTS/CTS disabled
+ * @radio_debugfsdir: Pointer to debugfs directory containing the radio-
+ * specific parameters.
+ * NULL (default) = Debugfs directory not created
*/
struct wiphy_radio_cfg {
u32 rts_threshold;
+ struct dentry *radio_debugfsdir;
};
/**
@@ -6961,6 +7044,19 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
}
/**
+ * ieee80211_radio_freq_range_valid - Check if the radio supports the
+ * specified frequency range
+ *
+ * @radio: wiphy radio
+ * @freq: the frequency (in KHz) to be queried
+ * @width: the bandwidth (in KHz) to be queried
+ *
+ * Return: whether or not the given frequency range is valid for the given radio
+ */
+bool ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio,
+ u32 freq, u32 width);
+
+/**
* cfg80211_radio_chandef_valid - Check if the radio supports the chandef
*
* @radio: wiphy radio
@@ -9770,6 +9866,21 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy,
const struct ieee80211_channel *chan);
+/**
+ * cfg80211_stop_link - stop AP/P2P_GO link if link_id is non-negative or stops
+ * all links on the interface.
+ *
+ * @wiphy: the wiphy
+ * @wdev: wireless device
+ * @link_id: valid link ID in case of MLO AP/P2P_GO Operation or else -1
+ * @gfp: context flags
+ *
+ * If link_id is set during MLO operation, stops only the specified AP/P2P_GO
+ * link and if link_id is set to -1 or last link is stopped, the entire
+ * interface is stopped as if AP was stopped, IBSS/mesh left, STA disconnected.
+ */
+void cfg80211_stop_link(struct wiphy *wiphy, struct wireless_dev *wdev,
+ int link_id, gfp_t gfp);
/**
* cfg80211_stop_iface - trigger interface disconnection
@@ -9783,8 +9894,11 @@ int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy,
*
* Note: This doesn't need any locks and is asynchronous.
*/
-void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
- gfp_t gfp);
+static inline void
+cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, gfp_t gfp)
+{
+ cfg80211_stop_link(wiphy, wdev, -1, gfp);
+}
/**
* cfg80211_shutdown_all_interfaces - shut down all interfaces for a wiphy
@@ -10118,6 +10232,36 @@ static inline int cfg80211_color_change_notify(struct net_device *dev,
}
/**
+ * cfg80211_6ghz_power_type - determine AP regulatory power type
+ * @control: control flags
+ * @client_flags: &enum ieee80211_channel_flags for station mode to enable
+ * SP to LPI fallback, zero otherwise.
+ *
+ * Return: regulatory power type from &enum ieee80211_ap_reg_power
+ */
+static inline enum ieee80211_ap_reg_power
+cfg80211_6ghz_power_type(u8 control, u32 client_flags)
+{
+ switch (u8_get_bits(control, IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP:
+ case IEEE80211_6GHZ_CTRL_REG_AP_ROLE_NOT_RELEVANT:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD:
+ return IEEE80211_REG_LPI_AP;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ return IEEE80211_REG_SP_AP;
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ return IEEE80211_REG_VLP_AP;
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP:
+ if (client_flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT)
+ return IEEE80211_REG_LPI_AP;
+ return IEEE80211_REG_SP_AP;
+ default:
+ return IEEE80211_REG_UNSET_AP;
+ }
+}
+
+/**
* cfg80211_links_removed - Notify about removed STA MLD setup links.
* @dev: network device.
* @link_mask: BIT mask of removed STA MLD setup link IDs.
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 9e824f61e40f..cb839e0435a1 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -479,6 +479,10 @@ struct devlink_flash_notify {
* @set: set parameter value, used for runtime and permanent
* configuration modes
* @validate: validate input value is applicable (within value range, etc.)
+ * @get_default: get parameter default value, used for runtime and permanent
+ * configuration modes
+ * @reset_default: reset parameter to default value, used for runtime and permanent
+ * configuration modes
*
* This struct should be used by the driver to fill the data for
* a parameter it registers.
@@ -490,13 +494,20 @@ struct devlink_param {
enum devlink_param_type type;
unsigned long supported_cmodes;
int (*get)(struct devlink *devlink, u32 id,
- struct devlink_param_gset_ctx *ctx);
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack);
int (*set)(struct devlink *devlink, u32 id,
struct devlink_param_gset_ctx *ctx,
struct netlink_ext_ack *extack);
int (*validate)(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack);
+ int (*get_default)(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack);
+ int (*reset_default)(struct devlink *devlink, u32 id,
+ enum devlink_param_cmode cmode,
+ struct netlink_ext_ack *extack);
};
struct devlink_param_item {
@@ -508,6 +519,7 @@ struct devlink_param_item {
* until reload.
*/
bool driverinit_value_new_valid;
+ union devlink_param_value driverinit_default;
};
enum devlink_param_generic_id {
@@ -532,6 +544,7 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_CLOCK_ID,
DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS,
DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MAC_PER_VF,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -602,6 +615,9 @@ enum devlink_param_generic_id {
#define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_NAME "num_doorbells"
#define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_TYPE DEVLINK_PARAM_TYPE_U32
+#define DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_NAME "max_mac_per_vf"
+#define DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_TYPE DEVLINK_PARAM_TYPE_U32
+
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
@@ -625,6 +641,37 @@ enum devlink_param_generic_id {
.validate = _validate, \
}
+#define DEVLINK_PARAM_GENERIC_WITH_DEFAULTS(_id, _cmodes, _get, _set, \
+ _validate, _get_default, \
+ _reset_default) \
+{ \
+ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \
+ .name = DEVLINK_PARAM_GENERIC_##_id##_NAME, \
+ .type = DEVLINK_PARAM_GENERIC_##_id##_TYPE, \
+ .generic = true, \
+ .supported_cmodes = _cmodes, \
+ .get = _get, \
+ .set = _set, \
+ .validate = _validate, \
+ .get_default = _get_default, \
+ .reset_default = _reset_default, \
+}
+
+#define DEVLINK_PARAM_DRIVER_WITH_DEFAULTS(_id, _name, _type, _cmodes, \
+ _get, _set, _validate, \
+ _get_default, _reset_default) \
+{ \
+ .id = _id, \
+ .name = _name, \
+ .type = _type, \
+ .supported_cmodes = _cmodes, \
+ .get = _get, \
+ .set = _set, \
+ .validate = _validate, \
+ .get_default = _get_default, \
+ .reset_default = _reset_default, \
+}
+
/* Identifier of board design */
#define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID "board.id"
/* Revision of board design */
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 58d91ccc56e0..a7b7abd66e21 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -67,6 +67,7 @@
FN(TC_EGRESS) \
FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
+ FN(QDISC_BURST_DROP) \
FN(QDISC_OVERLIMIT) \
FN(QDISC_CONGESTED) \
FN(CAKE_FLOOD) \
@@ -375,6 +376,11 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_QDISC_DROP,
/**
+ * @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst
+ * limit is hit.
+ */
+ SKB_DROP_REASON_QDISC_BURST_DROP,
+ /**
* @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
* instance exceeds its total buffer size limit.
*/
diff --git a/include/net/dsa.h b/include/net/dsa.h
index d73ea0880066..6c17446f3dcc 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -55,6 +55,9 @@ struct tc_action;
#define DSA_TAG_PROTO_LAN937X_VALUE 27
#define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28
#define DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE 29
+#define DSA_TAG_PROTO_YT921X_VALUE 30
+#define DSA_TAG_PROTO_MXL_GSW1XX_VALUE 31
+#define DSA_TAG_PROTO_MXL862_VALUE 32
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
@@ -87,6 +90,9 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE,
DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE,
DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE,
+ DSA_TAG_PROTO_YT921X = DSA_TAG_PROTO_YT921X_VALUE,
+ DSA_TAG_PROTO_MXL_GSW1XX = DSA_TAG_PROTO_MXL_GSW1XX_VALUE,
+ DSA_TAG_PROTO_MXL862 = DSA_TAG_PROTO_MXL862_VALUE,
};
struct dsa_switch;
@@ -212,12 +218,6 @@ struct dsa_mall_mirror_tc_entry {
bool ingress;
};
-/* TC port policer entry */
-struct dsa_mall_policer_tc_entry {
- u32 burst;
- u64 rate_bytes_per_sec;
-};
-
/* TC matchall entry */
struct dsa_mall_tc_entry {
struct list_head list;
@@ -225,7 +225,7 @@ struct dsa_mall_tc_entry {
enum dsa_port_mall_action_type type;
union {
struct dsa_mall_mirror_tc_entry mirror;
- struct dsa_mall_policer_tc_entry policer;
+ struct flow_action_police policer;
};
};
@@ -298,6 +298,7 @@ struct dsa_port {
struct devlink_port devlink_port;
struct phylink *pl;
struct phylink_config pl_config;
+ netdevice_tracker conduit_tracker;
struct dsa_lag *lag;
struct net_device *hsr_dev;
@@ -1105,7 +1106,7 @@ struct dsa_switch_ops {
void (*port_mirror_del)(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror);
int (*port_policer_add)(struct dsa_switch *ds, int port,
- struct dsa_mall_policer_tc_entry *policer);
+ const struct flow_action_police *policer);
void (*port_policer_del)(struct dsa_switch *ds, int port);
int (*port_setup_tc)(struct dsa_switch *ds, int port,
enum tc_setup_type type, void *type_data);
@@ -1247,7 +1248,8 @@ struct dsa_switch_ops {
dsa_devlink_param_get, dsa_devlink_param_set, NULL)
int dsa_devlink_param_get(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx);
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack);
int dsa_devlink_param_set(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx,
struct netlink_ext_ack *extack);
@@ -1310,11 +1312,6 @@ static inline int dsa_devlink_port_to_port(struct devlink_port *port)
return port->index;
}
-struct dsa_switch_driver {
- struct list_head list;
- const struct dsa_switch_ops *ops;
-};
-
bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid,
struct dsa_db db);
@@ -1322,6 +1319,15 @@ bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb,
struct dsa_db db);
+int dsa_port_simple_hsr_validate(struct dsa_switch *ds, int port,
+ struct net_device *hsr,
+ struct netlink_ext_ack *extack);
+int dsa_port_simple_hsr_join(struct dsa_switch *ds, int port,
+ struct net_device *hsr,
+ struct netlink_ext_ack *extack);
+int dsa_port_simple_hsr_leave(struct dsa_switch *ds, int port,
+ struct net_device *hsr);
+
/* Keep inline for faster access in hot path */
static inline bool netdev_uses_dsa(const struct net_device *dev)
{
diff --git a/include/net/dst.h b/include/net/dst.h
index f8aa1239b4db..307073eae7f8 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -219,6 +219,12 @@ static inline u32 dst_mtu(const struct dst_entry *dst)
return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst);
}
+/* Variant of dst_mtu() for IPv4 users. */
+static inline u32 dst4_mtu(const struct dst_entry *dst)
+{
+ return INDIRECT_CALL_1(dst->ops->mtu, ipv4_mtu, dst);
+}
+
/* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric)
{
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 596ab9791e4d..70a02ee14308 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -231,6 +231,21 @@ struct flow_action_cookie *flow_action_cookie_create(void *data,
gfp_t gfp);
void flow_action_cookie_destroy(struct flow_action_cookie *cookie);
+struct flow_action_police {
+ u32 burst;
+ u64 rate_bytes_ps;
+ u64 peakrate_bytes_ps;
+ u32 avrate;
+ u16 overhead;
+ u64 burst_pkt;
+ u64 rate_pkt_ps;
+ u32 mtu;
+ struct {
+ enum flow_action_id act_id;
+ u32 extval;
+ } exceed, notexceed;
+};
+
struct flow_action_entry {
enum flow_action_id id;
u32 hw_index;
@@ -275,20 +290,7 @@ struct flow_action_entry {
u32 trunc_size;
bool truncate;
} sample;
- struct { /* FLOW_ACTION_POLICE */
- u32 burst;
- u64 rate_bytes_ps;
- u64 peakrate_bytes_ps;
- u32 avrate;
- u16 overhead;
- u64 burst_pkt;
- u64 rate_pkt_ps;
- u32 mtu;
- struct {
- enum flow_action_id act_id;
- u32 extval;
- } exceed, notexceed;
- } police;
+ struct flow_action_police police; /* FLOW_ACTION_POLICE */
struct { /* FLOW_ACTION_CT */
int action;
u16 zone;
@@ -526,7 +528,7 @@ static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags,
*
* Return: true if control flags are set, false otherwise.
*/
-static inline bool flow_rule_match_has_control_flags(struct flow_rule *rule,
+static inline bool flow_rule_match_has_control_flags(const struct flow_rule *rule,
struct netlink_ext_ack *extack)
{
struct flow_match_control match;
@@ -718,7 +720,7 @@ struct flow_offload_action {
struct flow_offload_action *offload_action_alloc(unsigned int num_actions);
static inline struct flow_rule *
-flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
+flow_cls_offload_flow_rule(const struct flow_cls_offload *flow_cmd)
{
return flow_cmd->rule;
}
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index 9467e33dfb36..8aca881937da 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -358,7 +358,7 @@ static int fq_init(struct fq *fq, int flows_cnt)
fq->limit = 8192;
fq->memory_limit = 16 << 20; /* 16 MBytes */
- fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
+ fq->flows = kvzalloc_objs(fq->flows[0], fq->flows_cnt);
if (!fq->flows)
return -ENOMEM;
diff --git a/include/net/gro.h b/include/net/gro.h
index e3affb2e2ca8..2300b6da05b2 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -405,9 +405,8 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *);
+int udp6_gro_complete(struct sk_buff *, int);
#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \
({ \
@@ -593,4 +592,31 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
struct packet_offload *gro_find_receive_by_type(__be16 type);
struct packet_offload *gro_find_complete_by_type(__be16 type);
+static inline struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
+{
+ unsigned int thlen, hlen, off;
+ struct tcphdr *th;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*th);
+ th = skb_gro_header(skb, hlen, off);
+ if (unlikely(!th))
+ return NULL;
+
+ thlen = th->doff * 4;
+ if (unlikely(thlen < sizeof(*th)))
+ return NULL;
+
+ hlen = off + thlen;
+ if (!skb_gro_may_pull(skb, hlen)) {
+ th = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!th))
+ return NULL;
+ }
+
+ skb_gro_pull(skb, thlen);
+
+ return th;
+}
+
#endif /* _NET_GRO_H */
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 4acec191c54a..6632b1aa7584 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -42,6 +42,7 @@ struct net_hotdata {
int netdev_budget_usecs;
int tstamp_prequeue;
int max_backlog;
+ int qdisc_max_burst;
int dev_tx_weight;
int dev_rx_weight;
int sysctl_max_skb_frags;
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 813e163ce27c..c60867e7e43c 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2017 Intel Deutschland GmbH
- * Copyright (c) 2018-2019, 2021-2022 Intel Corporation
+ * Copyright (c) 2018-2019, 2021-2022, 2025 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -202,6 +202,24 @@ enum ieee80211_radiotap_vht_coding {
IEEE80211_RADIOTAP_CODING_LDPC_USER3 = 0x08,
};
+enum ieee80211_radiotap_vht_bandwidth {
+ /* Note: more values are defined but can't really be used */
+ IEEE80211_RADIOTAP_VHT_BW_20 = 0,
+ IEEE80211_RADIOTAP_VHT_BW_40 = 1,
+ IEEE80211_RADIOTAP_VHT_BW_80 = 4,
+ IEEE80211_RADIOTAP_VHT_BW_160 = 11,
+};
+
+struct ieee80211_radiotap_vht {
+ __le16 known;
+ u8 flags;
+ u8 bandwidth;
+ u8 mcs_nss[4];
+ u8 coding;
+ u8 group_id;
+ __le16 partial_aid;
+} __packed;
+
/* for IEEE80211_RADIOTAP_TIMESTAMP */
enum ieee80211_radiotap_timestamp_unit_spos {
IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK = 0x000F,
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 745891d2e113..ece8dabd209a 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -18,7 +18,9 @@ struct sk_buff;
struct sock;
struct sockaddr;
-struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *inet6_csk_route_req(const struct sock *sk,
+ struct dst_entry *dst,
+ struct flowi6 *fl6,
const struct request_sock *req, u8 proto);
int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 282e29237d93..c16de5b7963f 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk,
{
if (!net_eq(sock_net(sk), net) ||
sk->sk_family != AF_INET6 ||
- sk->sk_portpair != ports ||
+ READ_ONCE(sk->sk_portpair) != ports ||
!ipv6_addr_equal(&sk->sk_v6_daddr, saddr) ||
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return false;
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index c17a6585d0b0..5dd2bf24449e 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -19,15 +19,14 @@ struct msghdr;
struct net;
struct page;
struct sock;
-struct sockaddr;
struct socket;
int inet_release(struct socket *sock);
-int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+int inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
int addr_len, int flags);
-int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+int __inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
int addr_len, int flags, int is_sendmsg);
-int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
+int inet_dgram_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
int addr_len, int flags);
int inet_accept(struct socket *sock, struct socket *newsock,
struct proto_accept_arg *arg);
@@ -42,8 +41,8 @@ int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
int __inet_listen_sk(struct sock *sk, int backlog);
void inet_sock_destruct(struct sock *sk);
-int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+int inet_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len);
+int inet_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
/* Don't allocate port at this moment, defer to connect. */
#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
/* Grab and release socket lock. */
@@ -52,7 +51,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
#define BIND_FROM_BPF (1 << 2)
/* Skip CAP_NET_BIND_SERVICE check. */
#define BIND_NO_CAP_NET_BIND_SERVICE (1 << 3)
-int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+int __inet_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
u32 flags);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b4b886647607..5cb3056d6ddc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -42,7 +42,9 @@ struct inet_connection_sock_af_ops {
struct request_sock *req,
struct dst_entry *dst,
struct request_sock *req_unhash,
- bool *own_req);
+ bool *own_req,
+ void (*opt_child_init)(struct sock *newsk,
+ const struct sock *sk));
u16 net_header_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
@@ -56,7 +58,9 @@ struct inet_connection_sock_af_ops {
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_bind2_hash: Bind node in the bhash2 table
- * @icsk_retransmit_timer: Resend (no ack)
+ * @icsk_delack_timer: Delayed ACK timer
+ * @icsk_keepalive_timer: Keepalive timer
+ * @mptcp_tout_timer: mptcp timer
* @icsk_rto: Retransmit timeout
* @icsk_pmtu_cookie Last pmtu seen by socket
* @icsk_ca_ops Pluggable congestion control hook
@@ -81,8 +85,11 @@ struct inet_connection_sock {
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
struct inet_bind2_bucket *icsk_bind2_hash;
- struct timer_list icsk_retransmit_timer;
- struct timer_list icsk_delack_timer;
+ struct timer_list icsk_delack_timer;
+ union {
+ struct timer_list icsk_keepalive_timer;
+ struct timer_list mptcp_tout_timer;
+ };
__u32 icsk_rto;
__u32 icsk_rto_min;
u32 icsk_rto_max;
@@ -184,10 +191,9 @@ static inline void inet_csk_delack_init(struct sock *sk)
memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
}
-static inline unsigned long
-icsk_timeout(const struct inet_connection_sock *icsk)
+static inline unsigned long tcp_timeout_expires(const struct sock *sk)
{
- return READ_ONCE(icsk->icsk_retransmit_timer.expires);
+ return READ_ONCE(sk->tcp_retransmit_timer.expires);
}
static inline unsigned long
@@ -203,7 +209,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
smp_store_release(&icsk->icsk_pending, 0);
#ifdef INET_CSK_CLEAR_TIMERS
- sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer(sk, &sk->tcp_retransmit_timer);
#endif
} else if (what == ICSK_TIME_DACK) {
smp_store_release(&icsk->icsk_ack.pending, 0);
@@ -235,7 +241,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
smp_store_release(&icsk->icsk_pending, what);
- sk_reset_timer(sk, &icsk->icsk_retransmit_timer, when);
+ sk_reset_timer(sk, &sk->tcp_retransmit_timer, when);
} else if (what == ICSK_TIME_DACK) {
smp_store_release(&icsk->icsk_ack.pending,
icsk->icsk_ack.pending | ICSK_ACK_TIMER);
@@ -267,8 +273,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
struct request_sock *req,
struct sock *child);
-bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
- unsigned long timeout);
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req);
struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
struct request_sock *req,
bool own_req);
@@ -291,14 +296,6 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
-static inline unsigned long
-reqsk_timeout(struct request_sock *req, unsigned long max_timeout)
-{
- u64 timeout = (u64)req->timeout << req->num_timeout;
-
- return (unsigned long)min_t(u64, timeout, max_timeout);
-}
-
void inet_csk_destroy_sock(struct sock *sk);
void inet_csk_prepare_for_destroy_sock(struct sock *sk);
void inet_csk_prepare_forced_close(struct sock *sk);
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index ea32393464a2..827b87a95dab 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
return outer;
}
+/* Apply either ECT(0) or ECT(1) */
+static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
+{
+ __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
+
+ /* Mask the complete byte in case the connection alternates between
+ * ECT(0) and ECT(1).
+ */
+ inet_sk(sk)->tos &= ~INET_ECN_MASK;
+ inet_sk(sk)->tos |= ect;
+ if (inet6_sk(sk)) {
+ inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
+ inet6_sk(sk)->tclass |= ect;
+ }
+}
+
static inline void INET_ECN_xmit(struct sock *sk)
{
- inet_sk(sk)->tos |= INET_ECN_ECT_0;
- if (inet6_sk(sk) != NULL)
- inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+ __INET_ECN_xmit(sk, false);
}
static inline void INET_ECN_dontxmit(struct sock *sk)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 0eccd9c3a883..365925c9d262 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -123,27 +123,15 @@ void inet_frags_fini(struct inet_frags *);
int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
-static inline void fqdir_pre_exit(struct fqdir *fqdir)
-{
- /* Prevent creation of new frags.
- * Pairs with READ_ONCE() in inet_frag_find().
- */
- WRITE_ONCE(fqdir->high_thresh, 0);
-
- /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
- * and ip6frag_expire_frag_queue().
- */
- WRITE_ONCE(fqdir->dead, true);
-}
+void fqdir_pre_exit(struct fqdir *fqdir);
void fqdir_exit(struct fqdir *fqdir);
void inet_frag_kill(struct inet_frag_queue *q, int *refs);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
-/* Free all skbs in the queue; return the sum of their truesizes. */
-unsigned int inet_frag_rbtree_purge(struct rb_root *root,
- enum skb_drop_reason reason);
+void inet_frag_queue_flush(struct inet_frag_queue *q,
+ enum skb_drop_reason reason);
static inline void inet_frag_putn(struct inet_frag_queue *q, int refs)
{
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ac05a52d9e13..5a979dcab538 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -345,7 +345,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk,
int dif, int sdif)
{
if (!net_eq(sock_net(sk), net) ||
- sk->sk_portpair != ports ||
+ READ_ONCE(sk->sk_portpair) != ports ||
sk->sk_addrpair != cookie)
return false;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1086256549fa..7cdcbed3e5cb 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -26,6 +26,8 @@
#include <net/tcp_states.h>
#include <net/l3mdev.h>
+#define IP_OPTIONS_DATA_FIXED_SIZE 40
+
/** struct ip_options - IP Options
*
* @faddr - Saved first hop address
@@ -58,12 +60,9 @@ struct ip_options {
struct ip_options_rcu {
struct rcu_head rcu;
- struct ip_options opt;
-};
-struct ip_options_data {
- struct ip_options_rcu opt;
- char data[40];
+ /* Must be last as it ends in a flexible-array member. */
+ struct ip_options opt;
};
struct inet_request_sock {
@@ -101,10 +100,7 @@ struct inet_request_sock {
};
};
-static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
-{
- return (struct inet_request_sock *)sk;
-}
+#define inet_rsk(ptr) container_of_const(ptr, struct inet_request_sock, req)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
@@ -163,6 +159,13 @@ static inline bool inet_sk_bound_dev_eq(const struct net *net,
#endif
}
+struct inet6_cork {
+ struct ipv6_txoptions *opt;
+ u8 hop_limit;
+ u8 tclass;
+ u8 dontfrag:1;
+};
+
struct inet_cork {
unsigned int flags;
__be32 addr;
@@ -183,6 +186,9 @@ struct inet_cork {
struct inet_cork_full {
struct inet_cork base;
struct flowi fl;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_cork base6;
+#endif
};
struct ip_mc_socklist;
@@ -214,6 +220,7 @@ struct inet_sock {
struct sock sk;
#if IS_ENABLED(CONFIG_IPV6)
struct ipv6_pinfo *pinet6;
+ struct ipv6_fl_socklist __rcu *ipv6_fl_list;
#endif
/* Socket demultiplex comparisons on incoming packets. */
#define inet_daddr sk.__sk_common.skc_daddr
@@ -354,14 +361,6 @@ static inline struct sock *skb_to_full_sk(const struct sk_buff *skb)
#define inet_sk(ptr) container_of_const(ptr, struct inet_sock, sk)
-static inline void __inet_sk_copy_descendant(struct sock *sk_to,
- const struct sock *sk_from,
- const int ancestor_size)
-{
- memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
- sk_from->sk_prot->obj_size - ancestor_size);
-}
-
int inet_sk_rebuild_header(struct sock *sk);
/**
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
index 2cbbee6e806a..a75912fe247e 100644
--- a/include/net/ioam6.h
+++ b/include/net/ioam6.h
@@ -60,6 +60,8 @@ void ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_trace_hdr *trace,
bool is_input);
+u8 ioam6_trace_compute_nodelen(u32 trace_type);
+
int ioam6_init(void);
void ioam6_exit(void);
diff --git a/include/net/ip.h b/include/net/ip.h
index 380afb691c41..7f9abd457e01 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
- ipcm->protocol = inet->inet_num;
+ ipcm->protocol = READ_ONCE(inet->inet_num);
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
@@ -261,8 +261,8 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
}
/* datagram.c */
-int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
-int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+int __ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
+int ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
void ip4_datagram_release_cb(struct sock *sk);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 7c5512baa4b2..a55f9bf95fe3 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -266,6 +266,12 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
+/* Variant of dst_mtu() for IPv6 users */
+static inline u32 dst6_mtu(const struct dst_entry *dst)
+{
+ return INDIRECT_CALL_1(dst->ops->mtu, ip6_mtu, dst);
+}
+
static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
{
const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 120db2865811..359b595f1df9 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -156,6 +156,18 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
{
int pkt_len, err;
+ if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) {
+ if (dev) {
+ net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
+ dev->name);
+ DEV_STATS_INC(dev, tx_errors);
+ }
+ kfree_skb(skb);
+ return;
+ }
+
+ dev_xmit_recursion_inc();
+
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->flags = ip6cb_flags;
pkt_len = skb->len - skb_inner_network_offset(skb);
@@ -166,6 +178,8 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
pkt_len = -1;
iptunnel_xmit_stats(dev, pkt_len);
}
+
+ dev_xmit_recursion_dec();
}
#endif
#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index b4495c38e0a0..318593743b6e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net,
siphash_aligned_key_t hash_key;
u32 mp_seed;
- mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
+ mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed);
fib_multipath_hash_construct_key(&hash_key, mp_seed);
return flow_hash_from_keys_seed(keys, &hash_key);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index ecae35512b9b..1f577a4f8ce9 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -19,6 +19,7 @@
#include <net/rtnetlink.h>
#include <net/lwtunnel.h>
#include <net/dst_cache.h>
+#include <net/netdev_lock.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -26,6 +27,13 @@
#include <net/ip6_route.h>
#endif
+/* Recursion limit for tunnel xmit to detect routing loops.
+ * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel
+ * recursion involves route lookups and full IP output, consuming much
+ * more stack per level, so a lower limit is needed.
+ */
+#define IP_TUNNEL_RECURSION_LIMIT 4
+
/* Keep error state on tunnel for 30 sec */
#define IPTUNNEL_ERR_TIMEO (30*HZ)
@@ -372,7 +380,17 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
fl4->flowi4_flags = flow_flags;
}
-int ip_tunnel_init(struct net_device *dev);
+int __ip_tunnel_init(struct net_device *dev);
+#define ip_tunnel_init(DEV) \
+({ \
+ struct net_device *__dev = (DEV); \
+ int __res = __ip_tunnel_init(__dev); \
+ \
+ if (!__res) \
+ netdev_lockdep_set_classes(__dev);\
+ __res; \
+})
+
void ip_tunnel_uninit(struct net_device *dev);
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
struct net *ip_tunnel_get_link_net(const struct net_device *dev);
@@ -647,13 +665,29 @@ static inline int iptunnel_pull_offloads(struct sk_buff *skb)
static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
{
if (pkt_len > 0) {
- struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
-
- u64_stats_update_begin(&tstats->syncp);
- u64_stats_add(&tstats->tx_bytes, pkt_len);
- u64_stats_inc(&tstats->tx_packets);
- u64_stats_update_end(&tstats->syncp);
- put_cpu_ptr(tstats);
+ if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) {
+ struct pcpu_dstats *dstats = get_cpu_ptr(dev->dstats);
+
+ u64_stats_update_begin(&dstats->syncp);
+ u64_stats_add(&dstats->tx_bytes, pkt_len);
+ u64_stats_inc(&dstats->tx_packets);
+ u64_stats_update_end(&dstats->syncp);
+ put_cpu_ptr(dstats);
+ return;
+ }
+ if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
+ struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ u64_stats_add(&tstats->tx_bytes, pkt_len);
+ u64_stats_inc(&tstats->tx_packets);
+ u64_stats_update_end(&tstats->syncp);
+ put_cpu_ptr(tstats);
+ return;
+ }
+ pr_err_once("iptunnel_xmit_stats pcpu_stat_type=%d\n",
+ dev->pcpu_stat_type);
+ WARN_ON_ONCE(1);
return;
}
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2ccdf85f34f1..53c5056508be 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -25,8 +25,6 @@ struct ip_tunnel_info;
#define SIN6_LEN_RFC2133 24
-#define IPV6_MAXPLEN 65535
-
/*
* NextHeader field of IPv6 header
*/
@@ -151,17 +149,6 @@ struct frag_hdr {
__be32 identification;
};
-/*
- * Jumbo payload option, as described in RFC 2675 2.
- */
-struct hop_jumbo_hdr {
- u8 nexthdr;
- u8 hdrlen;
- u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */
- u8 tlv_len; /* 4 */
- __be32 jumbo_payload_len;
-};
-
#define IP6_MF 0x0001
#define IP6_OFFSET 0xFFF8
@@ -464,72 +451,6 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
struct ipv6_txoptions *opt);
-/* This helper is specialized for BIG TCP needs.
- * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
- * It assumes headers are already in skb->head.
- * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there.
- */
-static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
-{
- const struct hop_jumbo_hdr *jhdr;
- const struct ipv6hdr *nhdr;
-
- if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
- return 0;
-
- if (skb->protocol != htons(ETH_P_IPV6))
- return 0;
-
- if (skb_network_offset(skb) +
- sizeof(struct ipv6hdr) +
- sizeof(struct hop_jumbo_hdr) > skb_headlen(skb))
- return 0;
-
- nhdr = ipv6_hdr(skb);
-
- if (nhdr->nexthdr != NEXTHDR_HOP)
- return 0;
-
- jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1);
- if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
- jhdr->nexthdr != IPPROTO_TCP)
- return 0;
- return jhdr->nexthdr;
-}
-
-/* Return 0 if HBH header is successfully removed
- * Or if HBH removal is unnecessary (packet is not big TCP)
- * Return error to indicate dropping the packet
- */
-static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb)
-{
- const int hophdr_len = sizeof(struct hop_jumbo_hdr);
- int nexthdr = ipv6_has_hopopt_jumbo(skb);
- struct ipv6hdr *h6;
-
- if (!nexthdr)
- return 0;
-
- if (skb_cow_head(skb, 0))
- return -1;
-
- /* Remove the HBH header.
- * Layout: [Ethernet header][IPv6 header][HBH][L4 Header]
- */
- memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb),
- skb_network_header(skb) - skb_mac_header(skb) +
- sizeof(struct ipv6hdr));
-
- __skb_pull(skb, hophdr_len);
- skb->network_header += hophdr_len;
- skb->mac_header += hophdr_len;
-
- h6 = ipv6_hdr(skb);
- h6->nexthdr = nexthdr;
-
- return 0;
-}
-
static inline bool ipv6_accept_ra(const struct inet6_dev *idev)
{
s32 accept_ra = READ_ONCE(idev->cnf.accept_ra);
@@ -931,10 +852,10 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
#if IS_ENABLED(CONFIG_IPV6)
-static inline bool ipv6_can_nonlocal_bind(struct net *net,
- struct inet_sock *inet)
+static inline bool ipv6_can_nonlocal_bind(const struct net *net,
+ const struct inet_sock *inet)
{
- return net->ipv6.sysctl.ip_nonlocal_bind ||
+ return READ_ONCE(net->ipv6.sysctl.ip_nonlocal_bind) ||
test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}
@@ -949,10 +870,12 @@ static inline bool ipv6_can_nonlocal_bind(struct net *net,
#define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OPTOUT
-static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
+static inline __be32 ip6_make_flowlabel(const struct net *net,
+ struct sk_buff *skb,
__be32 flowlabel, bool autolabel,
struct flowi6 *fl6)
{
+ u8 auto_flowlabels;
u32 hash;
/* @flowlabel may include more than a flow label, eg, the traffic class.
@@ -960,10 +883,12 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
*/
flowlabel &= IPV6_FLOWLABEL_MASK;
- if (flowlabel ||
- net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF ||
- (!autolabel &&
- net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED))
+ if (flowlabel)
+ return flowlabel;
+
+ auto_flowlabels = READ_ONCE(net->ipv6.sysctl.auto_flowlabels);
+ if (auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF ||
+ (!autolabel && auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED))
return flowlabel;
hash = skb_get_hash_flowi6(skb, fl6);
@@ -976,15 +901,15 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
- if (net->ipv6.sysctl.flowlabel_state_ranges)
+ if (READ_ONCE(net->ipv6.sysctl.flowlabel_state_ranges))
flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG;
return flowlabel;
}
-static inline int ip6_default_np_autolabel(struct net *net)
+static inline int ip6_default_np_autolabel(const struct net *net)
{
- switch (net->ipv6.sysctl.auto_flowlabels) {
+ switch (READ_ONCE(net->ipv6.sysctl.auto_flowlabels)) {
case IP6_AUTO_FLOW_LABEL_OFF:
case IP6_AUTO_FLOW_LABEL_OPTIN:
default:
@@ -995,13 +920,13 @@ static inline int ip6_default_np_autolabel(struct net *net)
}
}
#else
-static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
+static inline __be32 ip6_make_flowlabel(const struct net *net, struct sk_buff *skb,
__be32 flowlabel, bool autolabel,
struct flowi6 *fl6)
{
return flowlabel;
}
-static inline int ip6_default_np_autolabel(struct net *net)
+static inline int ip6_default_np_autolabel(const struct net *net)
{
return 0;
}
@@ -1010,11 +935,11 @@ static inline int ip6_default_np_autolabel(struct net *net)
#if IS_ENABLED(CONFIG_IPV6)
static inline int ip6_multipath_hash_policy(const struct net *net)
{
- return net->ipv6.sysctl.multipath_hash_policy;
+ return READ_ONCE(net->ipv6.sysctl.multipath_hash_policy);
}
static inline u32 ip6_multipath_hash_fields(const struct net *net)
{
- return net->ipv6.sysctl.multipath_hash_fields;
+ return READ_ONCE(net->ipv6.sysctl.multipath_hash_fields);
}
#else
static inline int ip6_multipath_hash_policy(const struct net *net)
@@ -1103,8 +1028,7 @@ void ip6_flush_pending_frames(struct sock *sk);
int ip6_send_skb(struct sk_buff *skb);
struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
- struct inet_cork_full *cork,
- struct inet6_cork *v6_cork);
+ struct inet_cork_full *cork);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
@@ -1115,8 +1039,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
{
- return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
- &inet6_sk(sk)->cork);
+ return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork);
}
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
@@ -1147,11 +1070,11 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
* Extension header (options) processing
*/
-void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
- u8 *proto, struct in6_addr **daddr_p,
- struct in6_addr *saddr);
-void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
- u8 *proto);
+u8 ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 proto, struct in6_addr **daddr_p,
+ struct in6_addr *saddr);
+u8 ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 proto);
int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp,
__be16 *frag_offp);
@@ -1170,9 +1093,19 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target,
int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type);
-struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
- const struct ipv6_txoptions *opt,
- struct in6_addr *orig);
+struct in6_addr *__fl6_update_dst(struct flowi6 *fl6,
+ const struct ipv6_txoptions *opt,
+ struct in6_addr *orig);
+
+static inline struct in6_addr *
+fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt,
+ struct in6_addr *orig)
+{
+ if (likely(!opt))
+ return NULL;
+
+ return __fl6_update_dst(fl6, opt, orig);
+}
/*
* socket options (ipv6_sockglue.c)
@@ -1188,10 +1121,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
int ipv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr,
+int __ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *addr,
int addr_len);
-int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len);
-int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr,
+int ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *addr, int addr_len);
+int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr_unsized *addr,
int addr_len);
int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr);
void ip6_datagram_release_cb(struct sock *sk);
@@ -1208,8 +1141,8 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
void inet6_cleanup_sock(struct sock *sk);
void inet6_sock_destruct(struct sock *sk);
int inet6_release(struct socket *sock);
-int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len);
+int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
@@ -1280,12 +1213,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
static inline int ip6_sock_set_v6only(struct sock *sk)
{
- if (inet_sk(sk)->inet_num)
- return -EINVAL;
+ int ret = 0;
+
lock_sock(sk);
- sk->sk_ipv6only = true;
+ if (inet_sk(sk)->inet_num)
+ ret = -EINVAL;
+ else
+ sk->sk_ipv6only = true;
release_sock(sk);
- return 0;
+ return ret;
}
static inline void ip6_sock_set_recverr(struct sock *sk)
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 38ef66826939..41d9fc6965f9 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -69,9 +69,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
int refs = 1;
rcu_read_lock();
- /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
- if (READ_ONCE(fq->q.fqdir->dead))
- goto out_rcu_unlock;
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
@@ -80,6 +77,12 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
fq->q.flags |= INET_FRAG_DROP;
inet_frag_kill(&fq->q, &refs);
+ /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(fq->q.fqdir->dead)) {
+ inet_frag_queue_flush(&fq->q, 0);
+ goto out;
+ }
+
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
goto out;
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 8a3465c8c2c5..d3013e721b14 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -80,7 +80,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
struct ipv6_bpf_stub {
- int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len,
u32 flags);
struct sock *(*udp6_lib_lookup)(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index 9804fa5d9c67..18f511da9a09 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -195,35 +195,15 @@ struct iucv_handler {
struct list_head paths;
};
-/**
- * iucv_register:
- * @handler: address of iucv handler structure
- * @smp: != 0 indicates that the handler can deal with out of order messages
- *
- * Registers a driver with IUCV.
- *
- * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid
- * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus.
- */
int iucv_register(struct iucv_handler *handler, int smp);
+void iucv_unregister(struct iucv_handler *handler, int smp);
/**
- * iucv_unregister
- * @handler: address of iucv handler structure
- * @smp: != 0 indicates that the handler can deal with out of order messages
- *
- * Unregister driver from IUCV.
- */
-void iucv_unregister(struct iucv_handler *handle, int smp);
-
-/**
- * iucv_path_alloc
+ * iucv_path_alloc - Allocate a new path structure for use with iucv_connect.
* @msglim: initial message limit
* @flags: initial flags
* @gfp: kmalloc allocation flag
*
- * Allocate a new path structure for use with iucv_connect.
- *
* Returns: NULL if the memory allocation failed or a pointer to the
* path structure.
*/
@@ -231,7 +211,7 @@ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp)
{
struct iucv_path *path;
- path = kzalloc(sizeof(struct iucv_path), gfp);
+ path = kzalloc_obj(struct iucv_path, gfp);
if (path) {
path->msglim = msglim;
path->flags = flags;
@@ -240,229 +220,48 @@ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp)
}
/**
- * iucv_path_free
+ * iucv_path_free - Frees a path structure.
* @path: address of iucv path structure
- *
- * Frees a path structure.
*/
static inline void iucv_path_free(struct iucv_path *path)
{
kfree(path);
}
-/**
- * iucv_path_accept
- * @path: address of iucv path structure
- * @handler: address of iucv handler structure
- * @userdata: 16 bytes of data reflected to the communication partner
- * @private: private data passed to interrupt handlers for this path
- *
- * This function is issued after the user received a connection pending
- * external interrupt and now wishes to complete the IUCV communication path.
- *
- * Returns: the result of the CP IUCV call.
- */
int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
u8 *userdata, void *private);
-/**
- * iucv_path_connect
- * @path: address of iucv path structure
- * @handler: address of iucv handler structure
- * @userid: 8-byte user identification
- * @system: 8-byte target system identification
- * @userdata: 16 bytes of data reflected to the communication partner
- * @private: private data passed to interrupt handlers for this path
- *
- * This function establishes an IUCV path. Although the connect may complete
- * successfully, you are not able to use the path until you receive an IUCV
- * Connection Complete external interrupt.
- *
- * Returns: the result of the CP IUCV call.
- */
int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
u8 *userid, u8 *system, u8 *userdata,
void *private);
-/**
- * iucv_path_quiesce:
- * @path: address of iucv path structure
- * @userdata: 16 bytes of data reflected to the communication partner
- *
- * This function temporarily suspends incoming messages on an IUCV path.
- * You can later reactivate the path by invoking the iucv_resume function.
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_path_quiesce(struct iucv_path *path, u8 *userdata);
-/**
- * iucv_path_resume:
- * @path: address of iucv path structure
- * @userdata: 16 bytes of data reflected to the communication partner
- *
- * This function resumes incoming messages on an IUCV path that has
- * been stopped with iucv_path_quiesce.
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_path_resume(struct iucv_path *path, u8 *userdata);
-/**
- * iucv_path_sever
- * @path: address of iucv path structure
- * @userdata: 16 bytes of data reflected to the communication partner
- *
- * This function terminates an IUCV path.
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_path_sever(struct iucv_path *path, u8 *userdata);
-/**
- * iucv_message_purge
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- * @srccls: source class of message
- *
- * Cancels a message you have sent.
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
u32 srccls);
-/**
- * iucv_message_receive
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- * @flags: flags that affect how the message is received (IUCV_IPBUFLST)
- * @buffer: address of data buffer or address of struct iucv_array
- * @size: length of data buffer
- * @residual:
- *
- * This function receives messages that are being sent to you over
- * established paths. This function will deal with RMDATA messages
- * embedded in struct iucv_message as well.
- *
- * Locking: local_bh_enable/local_bh_disable
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size, size_t *residual);
-/**
- * __iucv_message_receive
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- * @flags: flags that affect how the message is received (IUCV_IPBUFLST)
- * @buffer: address of data buffer or address of struct iucv_array
- * @size: length of data buffer
- * @residual:
- *
- * This function receives messages that are being sent to you over
- * established paths. This function will deal with RMDATA messages
- * embedded in struct iucv_message as well.
- *
- * Locking: no locking.
- *
- * Returns: the result from the CP IUCV call.
- */
int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size,
size_t *residual);
-/**
- * iucv_message_reject
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- *
- * The reject function refuses a specified message. Between the time you
- * are notified of a message and the time that you complete the message,
- * the message may be rejected.
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
-/**
- * iucv_message_reply
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- * @flags: how the reply is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
- * @reply: address of data buffer or address of struct iucv_array
- * @size: length of reply data buffer
- *
- * This function responds to the two-way messages that you receive. You
- * must identify completely the message to which you wish to reply. ie,
- * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into
- * the parameter list.
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *reply, size_t size);
-/**
- * iucv_message_send
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
- * @srccls: source class of message
- * @buffer: address of data buffer or address of struct iucv_array
- * @size: length of send buffer
- *
- * This function transmits data to another application. Data to be
- * transmitted is in a buffer and this is a one-way message and the
- * receiver will not reply to the message.
- *
- * Locking: local_bh_enable/local_bh_disable
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
-/**
- * __iucv_message_send
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
- * @srccls: source class of message
- * @buffer: address of data buffer or address of struct iucv_array
- * @size: length of send buffer
- *
- * This function transmits data to another application. Data to be
- * transmitted is in a buffer and this is a one-way message and the
- * receiver will not reply to the message.
- *
- * Locking: no locking.
- *
- * Returns: the result from the CP IUCV call.
- */
int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
-/**
- * iucv_message_send2way
- * @path: address of iucv path structure
- * @msg: address of iucv msg structure
- * @flags: how the message is sent and the reply is received
- * (IUCV_IPRMDATA, IUCV_IPBUFLST, IUCV_IPPRTY, IUCV_ANSLST)
- * @srccls: source class of message
- * @buffer: address of data buffer or address of struct iucv_array
- * @size: length of send buffer
- * @ansbuf: address of answer buffer or address of struct iucv_array
- * @asize: size of reply buffer
- *
- * This function transmits data to another application. Data to be
- * transmitted is in a buffer. The receiver of the send is expected to
- * reply to the message and a buffer is provided into which IUCV moves
- * the reply to this message.
- *
- * Returns: the result from the CP IUCV call.
- */
int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size,
void *answer, size_t asize, size_t *residual);
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 1eb8dad18f7e..710e98665eb3 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -207,18 +207,19 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
static inline
struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev;
+ rcu_read_lock();
+ dev = skb_dst_dev_rcu(skb);
if (netif_is_l3_slave(dev)) {
struct net_device *master;
- rcu_read_lock();
master = netdev_master_upper_dev_get_rcu(dev);
if (master && master->l3mdev_ops->l3mdev_l3_out)
skb = master->l3mdev_ops->l3mdev_l3_out(master, sk,
skb, proto);
- rcu_read_unlock();
}
+ rcu_read_unlock();
return skb;
}
diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h
index 481a7b28e6f2..82b5d21aae87 100644
--- a/include/net/libeth/xsk.h
+++ b/include/net/libeth/xsk.h
@@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp,
* @pending: current number of XSkFQEs to refill
* @thresh: threshold below which the queue is refilled
* @buf_len: HW-writeable length per each buffer
+ * @truesize: step between consecutive buffers, 0 if none exists
* @nid: ID of the closest NUMA node with memory
*/
struct libeth_xskfq {
@@ -614,6 +615,8 @@ struct libeth_xskfq {
u32 thresh;
u32 buf_len;
+ u32 truesize;
+
int nid;
};
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a55085cf4ec4..adce2144a678 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2025 Intel Corporation
+ * Copyright (C) 2018 - 2026 Intel Corporation
*/
#ifndef MAC80211_H
@@ -706,6 +706,7 @@ struct ieee80211_parsed_tpe {
* @pwr_reduction: power constraint of BSS.
* @eht_support: does this BSS support EHT
* @epcs_support: does this BSS support EPCS
+ * @uhr_support: does this BSS support UHR
* @csa_active: marks whether a channel switch is going on.
* @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @chanctx_conf: The channel context this interface is assigned to, or %NULL
@@ -832,6 +833,8 @@ struct ieee80211_bss_conf {
u8 pwr_reduction;
bool eht_support;
bool epcs_support;
+ bool uhr_support;
+
bool csa_active;
bool mu_mimo_owner;
@@ -1529,6 +1532,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* known the frame shouldn't be reported.
* @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by
* hardware or driver)
+ * @RX_FLAG_RADIOTAP_VHT: VHT radiotap data is present
*/
enum mac80211_rx_flags {
RX_FLAG_MMIC_ERROR = BIT(0),
@@ -1564,6 +1568,7 @@ enum mac80211_rx_flags {
RX_FLAG_RADIOTAP_LSIG = BIT(28),
RX_FLAG_NO_PSDU = BIT(29),
RX_FLAG_8023 = BIT(30),
+ RX_FLAG_RADIOTAP_VHT = BIT(31),
};
/**
@@ -1596,6 +1601,7 @@ enum mac80211_rx_encoding {
RX_ENC_VHT,
RX_ENC_HE,
RX_ENC_EHT,
+ RX_ENC_UHR,
};
/**
@@ -1629,7 +1635,7 @@ enum mac80211_rx_encoding {
* @antenna: antenna used
* @rate_idx: index of data rate into band's supported rates or MCS index if
* HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT)
- * @nss: number of streams (VHT, HE and EHT only)
+ * @nss: number of streams (VHT, HE, EHT and UHR only)
* @flag: %RX_FLAG_\*
* @encoding: &enum mac80211_rx_encoding
* @bw: &enum rate_info_bw
@@ -1640,6 +1646,11 @@ enum mac80211_rx_encoding {
* @eht: EHT specific rate information
* @eht.ru: EHT RU, from &enum nl80211_eht_ru_alloc
* @eht.gi: EHT GI, from &enum nl80211_eht_gi
+ * @uhr: UHR specific rate information
+ * @uhr.ru: UHR RU, from &enum nl80211_eht_ru_alloc
+ * @uhr.gi: UHR GI, from &enum nl80211_eht_gi
+ * @uhr.elr: UHR ELR MCS was used
+ * @uhr.im: UHR interference mitigation was used
* @rx_flags: internal RX flags for mac80211
* @ampdu_reference: A-MPDU reference number, must be a different value for
* each A-MPDU but the same for each subframe within one A-MPDU
@@ -1671,6 +1682,12 @@ struct ieee80211_rx_status {
u8 ru:4;
u8 gi:2;
} eht;
+ struct {
+ u8 ru:4;
+ u8 gi:2;
+ u8 elr:1;
+ u8 im:1;
+ } uhr;
};
u8 rate_idx;
u8 nss;
@@ -1901,6 +1918,31 @@ enum ieee80211_offload_flags {
};
/**
+ * struct ieee80211_eml_params - EHT Operating mode notification parameters
+ *
+ * EML Operating mode notification parameters received in the Operating mode
+ * notification frame. This struct is used as a container to pass the info to
+ * the underlay driver.
+ *
+ * @link_id: the link ID where the Operating mode notification frame has been
+ * received.
+ * @control: EML control field defined in P802.11be section 9.4.1.76.
+ * @link_bitmap: eMLSR/eMLMR enabled links defined in P802.11be
+ * section 9.4.1.76.
+ * @emlmr_mcs_map_count: eMLMR number of valid mcs_map_bw fields according to
+ * P802.11be section 9.4.1.76 (valid if eMLMR mode control bit is set).
+ * @emlmr_mcs_map_bw: eMLMR supported MCS and NSS set subfileds defined in
+ * P802.11be section 9.4.1.76 (valid if eMLMR mode control bit is set).
+ */
+struct ieee80211_eml_params {
+ u8 link_id;
+ u8 control;
+ u16 link_bitmap;
+ u8 emlmr_mcs_map_count;
+ u8 emlmr_mcs_map_bw[9];
+};
+
+/**
* struct ieee80211_vif_cfg - interface configuration
* @assoc: association status
* @ibss_joined: indicates whether this station is part of an IBSS or not
@@ -2432,6 +2474,7 @@ struct ieee80211_sta_aggregates {
* @he_cap: HE capabilities of this STA
* @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities
* @eht_cap: EHT capabilities of this STA
+ * @uhr_cap: UHR capabilities of this STA
* @s1g_cap: S1G capabilities of this STA
* @agg: per-link data for multi-link aggregation
* @bandwidth: current bandwidth the station can receive with
@@ -2455,6 +2498,7 @@ struct ieee80211_link_sta {
struct ieee80211_sta_he_cap he_cap;
struct ieee80211_he_6ghz_capa he_6ghz_capa;
struct ieee80211_sta_eht_cap eht_cap;
+ struct ieee80211_sta_uhr_cap uhr_cap;
struct ieee80211_sta_s1g_cap s1g_cap;
struct ieee80211_sta_aggregates agg;
@@ -2518,6 +2562,7 @@ struct ieee80211_link_sta {
* by the AP.
* @valid_links: bitmap of valid links, or 0 for non-MLO
* @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not.
+ * @epp_peer: indicates that the peer is an EPP peer.
*/
struct ieee80211_sta {
u8 addr[ETH_ALEN] __aligned(2);
@@ -2542,6 +2587,7 @@ struct ieee80211_sta {
struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1];
u16 valid_links;
+ bool epp_peer;
struct ieee80211_link_sta deflink;
struct ieee80211_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
@@ -4509,6 +4555,9 @@ struct ieee80211_prep_tx_info {
* interface with the specified type would be added, and thus drivers that
* implement this callback need to handle such cases. The type is the full
* &enum nl80211_iftype.
+ * @set_eml_op_mode: Configure eMLSR/eMLMR operation mode in the underlay
+ * driver according to the parameter received in the EML Operating mode
+ * notification frame.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
@@ -4904,6 +4953,10 @@ struct ieee80211_ops {
struct ieee80211_neg_ttlm *ttlm);
void (*prep_add_interface)(struct ieee80211_hw *hw,
enum nl80211_iftype type);
+ int (*set_eml_op_mode)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta,
+ struct ieee80211_eml_params *eml_params);
};
/**
@@ -6270,6 +6323,30 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
struct ieee80211_vif *vif),
void *data);
+struct ieee80211_vif *
+__ieee80211_iterate_interfaces(struct ieee80211_hw *hw,
+ struct ieee80211_vif *prev,
+ u32 iter_flags);
+
+/**
+ * for_each_interface - iterate interfaces under wiphy mutex
+ * @vif: the iterator variable
+ * @hw: the HW to iterate for
+ * @flags: the iteration flags, see &enum ieee80211_interface_iteration_flags
+ */
+#define for_each_interface(vif, hw, flags) \
+ for (vif = __ieee80211_iterate_interfaces(hw, NULL, flags); \
+ vif; \
+ vif = __ieee80211_iterate_interfaces(hw, vif, flags))
+
+/**
+ * for_each_active_interface - iterate active interfaces under wiphy mutex
+ * @vif: the iterator variable
+ * @hw: the HW to iterate for
+ */
+#define for_each_active_interface(vif, hw) \
+ for_each_interface(vif, hw, IEEE80211_IFACE_ITER_ACTIVE)
+
/**
* ieee80211_iterate_active_interfaces_mtx - iterate active interfaces
*
@@ -6282,12 +6359,18 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
* @iterator: the iterator function to call, cannot sleep
* @data: first argument of the iterator function
*/
-void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw,
- u32 iter_flags,
- void (*iterator)(void *data,
- u8 *mac,
- struct ieee80211_vif *vif),
- void *data);
+static inline void
+ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw,
+ u32 iter_flags,
+ void (*iterator)(void *data, u8 *mac,
+ struct ieee80211_vif *vif),
+ void *data)
+{
+ struct ieee80211_vif *vif;
+
+ for_each_interface(vif, hw, iter_flags | IEEE80211_IFACE_ITER_ACTIVE)
+ iterator(data, vif->addr, vif);
+}
/**
* ieee80211_iterate_stations_atomic - iterate stations
@@ -6306,6 +6389,20 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
struct ieee80211_sta *sta),
void *data);
+struct ieee80211_sta *
+__ieee80211_iterate_stations(struct ieee80211_hw *hw,
+ struct ieee80211_sta *prev);
+
+/**
+ * for_each_station - iterate stations under wiphy mutex
+ * @sta: the iterator variable
+ * @hw: the HW to iterate for
+ */
+#define for_each_station(sta, hw) \
+ for (sta = __ieee80211_iterate_stations(hw, NULL); \
+ sta; \
+ sta = __ieee80211_iterate_stations(hw, sta))
+
/**
* ieee80211_iterate_stations_mtx - iterate stations
*
@@ -6318,10 +6415,17 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
* @iterator: the iterator function to call
* @data: first argument of the iterator function
*/
-void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw,
- void (*iterator)(void *data,
- struct ieee80211_sta *sta),
- void *data);
+static inline void
+ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw,
+ void (*iterator)(void *data,
+ struct ieee80211_sta *sta),
+ void *data)
+{
+ struct ieee80211_sta *sta;
+
+ for_each_station(sta, hw)
+ iterator(data, sta);
+}
/**
* ieee80211_queue_work - add work onto the mac80211 workqueue
@@ -7221,7 +7325,7 @@ ieee80211_get_he_6ghz_capa_vif(const struct ieee80211_supported_band *sband,
}
/**
- * ieee80211_get_eht_iftype_cap_vif - return ETH capabilities for sband/vif
+ * ieee80211_get_eht_iftype_cap_vif - return EHT capabilities for sband/vif
* @sband: the sband to search for the iftype on
* @vif: the vif to get the iftype from
*
@@ -7235,6 +7339,20 @@ ieee80211_get_eht_iftype_cap_vif(const struct ieee80211_supported_band *sband,
}
/**
+ * ieee80211_get_uhr_iftype_cap_vif - return UHR capabilities for sband/vif
+ * @sband: the sband to search for the iftype on
+ * @vif: the vif to get the iftype from
+ *
+ * Return: pointer to the struct ieee80211_sta_uhr_cap, or %NULL is none found
+ */
+static inline const struct ieee80211_sta_uhr_cap *
+ieee80211_get_uhr_iftype_cap_vif(const struct ieee80211_supported_band *sband,
+ struct ieee80211_vif *vif)
+{
+ return ieee80211_get_uhr_iftype_cap(sband, ieee80211_vif_type_p2p(vif));
+}
+
+/**
* ieee80211_update_mu_groups - set the VHT MU-MIMO groud data
*
* @vif: the specified virtual interface
@@ -7289,7 +7407,9 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif,
* @band: the band to transmit on
* @sta: optional pointer to get the station to send the frame to
*
- * Return: %true if the skb was prepared, %false otherwise
+ * Return: %true if the skb was prepared, %false otherwise.
+ * On failure, the skb is freed by this function; callers must not
+ * free it again.
*
* Note: must be called under RCU lock
*/
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 57df78cfbf82..766f4fb25e26 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -35,6 +35,8 @@ enum gdma_request_type {
GDMA_CREATE_MR = 31,
GDMA_DESTROY_MR = 32,
GDMA_QUERY_HWC_TIMEOUT = 84, /* 0x54 */
+ GDMA_ALLOC_DM = 96, /* 0x60 */
+ GDMA_DESTROY_DM = 97, /* 0x61 */
};
#define GDMA_RESOURCE_DOORBELL_PAGE 27
@@ -382,6 +384,10 @@ struct gdma_irq_context {
char name[MANA_IRQ_NAME_SZ];
};
+enum gdma_context_flags {
+ GC_PROBE_SUCCEEDED = 0,
+};
+
struct gdma_context {
struct device *dev;
struct dentry *mana_pci_debugfs;
@@ -430,6 +436,8 @@ struct gdma_context {
u64 pf_cap_flags1;
struct workqueue_struct *service_wq;
+
+ unsigned long flags;
};
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
@@ -486,6 +494,8 @@ struct gdma_wqe {
#define INLINE_OOB_SMALL_SIZE 8
#define INLINE_OOB_LARGE_SIZE 24
+#define MANA_MAX_TX_WQE_SGL_ENTRIES 30
+
#define MAX_TX_WQE_SIZE 512
#define MAX_RX_WQE_SIZE 256
@@ -591,6 +601,20 @@ enum {
/* Driver can self reset on FPGA Reconfig EQE notification */
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
+/* Driver detects stalled send queues and recovers them */
+#define GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY BIT(18)
+
+#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
+
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
+/* Driver can send HWC periodically to query stats */
+#define GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY BIT(21)
+
+/* Driver can handle hardware recovery events during probe */
+#define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -599,7 +623,12 @@ enum {
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
- GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
+ GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
+ GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
+ GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \
+ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \
+ GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \
+ GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY)
#define GDMA_DRV_CAP_FLAGS2 0
@@ -839,6 +868,8 @@ enum gdma_mr_type {
GDMA_MR_TYPE_GVA = 2,
/* Guest zero-based address MRs */
GDMA_MR_TYPE_ZBVA = 4,
+ /* Device address MRs */
+ GDMA_MR_TYPE_DM = 5,
};
struct gdma_create_mr_params {
@@ -854,6 +885,12 @@ struct gdma_create_mr_params {
u64 dma_region_handle;
enum gdma_mr_access_flags access_flags;
} zbva;
+ struct {
+ u64 dm_handle;
+ u64 offset;
+ u64 length;
+ enum gdma_mr_access_flags access_flags;
+ } da;
};
};
@@ -868,13 +905,23 @@ struct gdma_create_mr_request {
u64 dma_region_handle;
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
- } gva;
+ } __packed gva;
struct {
u64 dma_region_handle;
enum gdma_mr_access_flags access_flags;
- } zbva;
- };
+ } __packed zbva;
+ struct {
+ u64 dm_handle;
+ u64 offset;
+ enum gdma_mr_access_flags access_flags;
+ } __packed da;
+ } __packed;
u32 reserved_2;
+ union {
+ struct {
+ u64 length;
+ } da_ext;
+ };
};/* HW DATA */
struct gdma_create_mr_response {
@@ -893,6 +940,27 @@ struct gdma_destroy_mr_response {
struct gdma_resp_hdr hdr;
};/* HW DATA */
+struct gdma_alloc_dm_req {
+ struct gdma_req_hdr hdr;
+ u64 length;
+ u32 alignment;
+ u32 flags;
+}; /* HW Data */
+
+struct gdma_alloc_dm_resp {
+ struct gdma_resp_hdr hdr;
+ u64 dm_handle;
+}; /* HW Data */
+
+struct gdma_destroy_dm_req {
+ struct gdma_req_hdr hdr;
+ u64 dm_handle;
+}; /* HW Data */
+
+struct gdma_destroy_dm_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
int mana_gd_verify_vf_version(struct pci_dev *pdev);
int mana_gd_register_device(struct gdma_dev *gd);
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 83cf93338eb3..16feb39616c1 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -24,6 +24,8 @@
#define HWC_INIT_DATA_PF_DEST_CQ_ID 11
#define HWC_DATA_CFG_HWC_TIMEOUT 1
+#define HWC_DATA_HW_LINK_CONNECT 2
+#define HWC_DATA_HW_LINK_DISCONNECT 3
#define HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS 30000
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0921485565c0..a078af283bdd 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -375,6 +375,14 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
+ u64 tx_cqe_err;
+ u64 tx_cqe_unknown_type;
+ u64 tx_linear_pkt_cnt;
+ u64 rx_coalesced_err;
+ u64 rx_cqe_unknown_type;
+};
+
+struct mana_ethtool_hc_stats {
u64 hc_rx_discards_no_wqe;
u64 hc_rx_err_vport_disabled;
u64 hc_rx_bytes;
@@ -402,10 +410,6 @@ struct mana_ethtool_stats {
u64 hc_tx_mcast_pkts;
u64 hc_tx_mcast_bytes;
u64 hc_tx_err_gdma;
- u64 tx_cqe_err;
- u64 tx_cqe_unknown_type;
- u64 rx_coalesced_err;
- u64 rx_cqe_unknown_type;
};
struct mana_ethtool_phy_stats {
@@ -473,15 +477,25 @@ struct mana_context {
u16 num_ports;
u8 bm_hostmode;
+ struct mana_ethtool_hc_stats hc_stats;
struct mana_eq *eqs;
struct dentry *mana_eqs_debugfs;
+ struct workqueue_struct *per_port_queue_reset_wq;
+ /* Workqueue for querying hardware stats */
+ struct delayed_work gf_stats_work;
+ bool hwc_timeout_occurred;
struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
+
+ /* Link state change work */
+ struct work_struct link_change_work;
+ u32 link_event;
};
struct mana_port_context {
struct mana_context *ac;
struct net_device *ndev;
+ struct work_struct queue_reset_work;
u8 mac_addr[ETH_ALEN];
@@ -573,13 +587,14 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
-void mana_query_gf_stats(struct mana_port_context *apc);
+int mana_query_gf_stats(struct mana_context *ac);
int mana_query_link_cfg(struct mana_port_context *apc);
int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed,
int enable_clamping);
void mana_query_phy_stats(struct mana_port_context *apc);
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
extern struct dentry *mana_debugfs_root;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 4a30bd458c5a..2dfee6d4258a 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -92,15 +92,17 @@ struct neigh_parms {
static inline void neigh_var_set(struct neigh_parms *p, int index, int val)
{
set_bit(index, p->data_state);
- p->data[index] = val;
+ WRITE_ONCE(p->data[index], val);
}
-#define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr])
+#define __NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr])
+#define NEIGH_VAR(p, attr) READ_ONCE(__NEIGH_VAR(p, attr))
+#define NEIGH_VAR_PTR(p, attr) (&(__NEIGH_VAR(p, attr)))
/* In ndo_neigh_setup, NEIGH_VAR_INIT should be used.
* In other cases, NEIGH_VAR_SET should be used.
*/
-#define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val)
+#define NEIGH_VAR_INIT(p, attr, val) (__NEIGH_VAR(p, attr) = val)
#define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val)
static inline void neigh_parms_data_state_setall(struct neigh_parms *p)
@@ -236,7 +238,7 @@ struct neigh_table {
atomic_t gc_entries;
struct list_head gc_list;
struct list_head managed_list;
- rwlock_t lock;
+ spinlock_t lock;
unsigned long last_rand;
struct neigh_statistics __percpu *stats;
struct neigh_hash_table __rcu *nht;
@@ -378,6 +380,13 @@ struct net *neigh_parms_net(const struct neigh_parms *parms)
unsigned long neigh_rand_reach_time(unsigned long base);
+static inline void neigh_set_reach_time(struct neigh_parms *p)
+{
+ unsigned long base = NEIGH_VAR(p, BASE_REACHABLE_TIME);
+
+ WRITE_ONCE(p->reachable_time, neigh_rand_reach_time(base));
+}
+
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct sk_buff *skb);
struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index cb664f6e3558..d7bec49ee9ea 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -37,6 +37,7 @@
#include <net/netns/smc.h>
#include <net/netns/bpf.h>
#include <net/netns/mctp.h>
+#include <net/netns/vsock.h>
#include <net/net_trackers.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
@@ -120,6 +121,7 @@ struct net {
* it is critical that it is on a read_mostly cache line.
*/
u32 hash_mix;
+ bool is_dying;
struct net_device *loopback_dev; /* The loopback */
@@ -196,6 +198,9 @@ struct net {
/* Move to a better place when the config guard is removed. */
struct mutex rtnl_mutex;
#endif
+#if IS_ENABLED(CONFIG_VSOCKETS)
+ struct netns_vsock vsock;
+#endif
} __randomize_layout;
#include <linux/seq_file_net.h>
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index cd00e0406cf4..95ed28212f4e 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -14,6 +14,10 @@ struct netdev_config {
u8 hds_config;
};
+struct netdev_queue_config {
+ u32 rx_page_size;
+};
+
/* See the netdev.yaml spec for definition of each statistic */
struct netdev_queue_stats_rx {
u64 bytes;
@@ -111,6 +115,11 @@ void netdev_stat_queue_sum(struct net_device *netdev,
int tx_start, int tx_end,
struct netdev_queue_stats_tx *tx_sum);
+enum {
+ /* The queue checks and honours the page size qcfg parameter */
+ QCFG_RX_PAGE_SIZE = 0x1,
+};
+
/**
* struct netdev_queue_mgmt_ops - netdev ops for queue management
*
@@ -130,27 +139,52 @@ void netdev_stat_queue_sum(struct net_device *netdev,
* @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used
* for this queue. Return NULL on error.
*
+ * @ndo_default_qcfg: (Optional) Populate queue config struct with defaults.
+ * Queue config structs are passed to this helper before
+ * the user-requested settings are applied.
+ *
+ * @ndo_validate_qcfg: (Optional) Check if queue config is supported.
+ * Called when configuration affecting a queue may be
+ * changing, either due to NIC-wide config, or config
+ * scoped to the queue at a specified index.
+ * When NIC-wide config is changed the callback will
+ * be invoked for all queues.
+ *
+ * @supported_params: Bitmask of supported parameters, see QCFG_*.
+ *
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
* the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
* be called for an interface which is open.
*/
struct netdev_queue_mgmt_ops {
- size_t ndo_queue_mem_size;
- int (*ndo_queue_mem_alloc)(struct net_device *dev,
- void *per_queue_mem,
- int idx);
- void (*ndo_queue_mem_free)(struct net_device *dev,
- void *per_queue_mem);
- int (*ndo_queue_start)(struct net_device *dev,
- void *per_queue_mem,
- int idx);
- int (*ndo_queue_stop)(struct net_device *dev,
- void *per_queue_mem,
- int idx);
- struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
- int idx);
+ size_t ndo_queue_mem_size;
+ int (*ndo_queue_mem_alloc)(struct net_device *dev,
+ struct netdev_queue_config *qcfg,
+ void *per_queue_mem,
+ int idx);
+ void (*ndo_queue_mem_free)(struct net_device *dev,
+ void *per_queue_mem);
+ int (*ndo_queue_start)(struct net_device *dev,
+ struct netdev_queue_config *qcfg,
+ void *per_queue_mem,
+ int idx);
+ int (*ndo_queue_stop)(struct net_device *dev,
+ void *per_queue_mem,
+ int idx);
+ void (*ndo_default_qcfg)(struct net_device *dev,
+ struct netdev_queue_config *qcfg);
+ int (*ndo_validate_qcfg)(struct net_device *dev,
+ struct netdev_queue_config *qcfg,
+ struct netlink_ext_ack *extack);
+ struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
+ int idx);
+
+ unsigned int supported_params;
};
+void netdev_queue_config(struct net_device *dev, int rxq,
+ struct netdev_queue_config *qcfg);
+
bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
/**
@@ -310,6 +344,17 @@ static inline void netif_subqueue_sent(const struct net_device *dev,
netdev_tx_sent_queue(txq, bytes);
}
+static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq)
+{
+ unsigned long trans_start = READ_ONCE(txq->trans_start);
+
+ if (netif_xmit_stopped(txq) &&
+ time_after(jiffies, trans_start + txq->dev->watchdog_timeo))
+ return jiffies_to_msecs(jiffies - trans_start);
+
+ return 0;
+}
+
#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
({ \
struct netdev_queue *_txq; \
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index 8cdcd138b33f..cfa72c485387 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -7,6 +7,7 @@
#include <linux/sysfs.h>
#include <net/xdp.h>
#include <net/page_pool/types.h>
+#include <net/netdev_queues.h>
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
@@ -27,6 +28,7 @@ struct netdev_rx_queue {
struct xsk_buff_pool *pool;
#endif
struct napi_struct *napi;
+ struct netdev_queue_config qcfg;
struct pp_memory_provider_params mp_params;
} ____cacheline_aligned_in_smp;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index aa0a7c82199e..bc42dd0e10e6 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -16,6 +16,7 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
+#include <net/netns/generic.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_sctp.h>
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 1b58b5b91ff6..cf0166520cf3 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -13,20 +13,20 @@ struct nf_conncount_list {
u32 last_gc; /* jiffies at most recent gc */
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
+ unsigned int last_gc_count; /* length of list at most recent gc */
};
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
-unsigned int nf_conncount_count(struct net *net,
- struct nf_conncount_data *data,
- const u32 *key,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone);
+unsigned int nf_conncount_count_skb(struct net *net,
+ const struct sk_buff *skb,
+ u16 l3num,
+ struct nf_conncount_data *data,
+ const u32 *key);
-int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone);
+int nf_conncount_add_skb(struct net *net, const struct sk_buff *skb,
+ u16 l3num, struct nf_conncount_list *list);
void nf_conncount_list_init(struct nf_conncount_list *list);
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6929f8daf1ed..cd5020835a6d 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -30,7 +30,7 @@ struct nf_conntrack_l4proto {
/* called by gc worker if table is full */
bool (*can_early_drop)(const struct nf_conn *ct);
- /* convert protoinfo to nfnetink attributes */
+ /* convert protoinfo to nfnetlink attributes */
int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
struct nf_conn *ct, bool destroy);
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index f7dd950ff250..4d55b7325707 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -11,7 +11,7 @@
#ifndef _NF_CONNTRACK_TUPLE_H
#define _NF_CONNTRACK_TUPLE_H
-#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <linux/list_nulls.h>
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index c003cd194fa2..b09c11c048d5 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -107,6 +107,19 @@ enum flow_offload_xmit_type {
#define NF_FLOW_TABLE_ENCAP_MAX 2
+struct flow_offload_tunnel {
+ union {
+ struct in_addr src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ struct in_addr dst_v4;
+ struct in6_addr dst_v6;
+ };
+
+ u8 l3_proto;
+};
+
struct flow_offload_tuple {
union {
struct in_addr src_v4;
@@ -130,22 +143,25 @@ struct flow_offload_tuple {
__be16 proto;
} encap[NF_FLOW_TABLE_ENCAP_MAX];
+ struct flow_offload_tunnel tun;
+
/* All members above are keys for lookups, see flow_offload_hash(). */
struct { } __hash;
u8 dir:2,
xmit_type:3,
encap_num:2,
+ tun_num:2,
in_vlan_ingress:2;
u16 mtu;
union {
struct {
struct dst_entry *dst_cache;
+ u32 ifidx;
u32 dst_cookie;
};
struct {
u32 ifidx;
- u32 hw_ifidx;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
} out;
@@ -206,7 +222,9 @@ struct nf_flow_route {
u16 id;
__be16 proto;
} encap[NF_FLOW_TABLE_ENCAP_MAX];
+ struct flow_offload_tunnel tun;
u8 num_encaps:2,
+ num_tuns:2,
ingress_vlans:2;
} in;
struct {
@@ -222,6 +240,12 @@ struct nf_flow_route {
struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
void flow_offload_free(struct flow_offload *flow);
+struct nft_flowtable;
+struct nft_pktinfo;
+int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
+ struct nft_flowtable *ft);
+
static inline int
nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
flow_setup_cb_t *cb, void *cb_priv)
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 4aeffddb7586..45eb26b2e95b 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -6,11 +6,13 @@
#include <linux/ipv6.h>
#include <linux/jhash.h>
#include <linux/netfilter.h>
+#include <linux/rhashtable-types.h>
#include <linux/skbuff.h>
/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
struct list_head list;
+ struct rhash_head hash_node;
struct sk_buff *skb;
unsigned int id;
unsigned int hook_index; /* index in hook_entries->hook[] */
@@ -19,7 +21,9 @@ struct nf_queue_entry {
struct net_device *physout;
#endif
struct nf_hook_state state;
+ bool nf_ct_is_unconfirmed;
u16 size; /* sizeof(entry) + saved route keys */
+ u16 queue_num;
/* extra space to store route keys */
};
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index fab7dc73f738..ec8a8ec9c0aa 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -6,7 +6,6 @@
#include <linux/list.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <linux/u64_stats_sync.h>
#include <linux/rhashtable.h>
@@ -317,11 +316,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
* @NFT_ITER_UNSPEC: unspecified, to catch errors
* @NFT_ITER_READ: read-only iteration over set elements
* @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element
*/
enum nft_iter_type {
NFT_ITER_UNSPEC,
NFT_ITER_READ,
NFT_ITER_UPDATE,
+ NFT_ITER_UPDATE_CLONE,
};
struct nft_set;
@@ -452,6 +453,7 @@ struct nft_set_ext;
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
* @gc_init: initialize garbage collection
+ * @abort_skip_removal: skip removal of elements from abort path
* @elemsize: element private size
*
* Operations lookup, update and delete have simpler interfaces, are faster
@@ -509,6 +511,7 @@ struct nft_set_ops {
const struct nft_set *set);
void (*gc_init)(const struct nft_set *set);
+ bool abort_skip_removal;
unsigned int elemsize;
};
@@ -871,6 +874,8 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
u64 timeout, u64 expiration, gfp_t gfp);
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[]);
+void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
+ struct nft_set_elem_expr *elem_expr);
void nft_set_elem_destroy(const struct nft_set *set,
const struct nft_elem_priv *elem_priv,
bool destroy_expr);
@@ -1091,6 +1096,29 @@ struct nft_rule_blob {
__attribute__((aligned(__alignof__(struct nft_rule_dp))));
};
+enum nft_chain_types {
+ NFT_CHAIN_T_DEFAULT = 0,
+ NFT_CHAIN_T_ROUTE,
+ NFT_CHAIN_T_NAT,
+ NFT_CHAIN_T_MAX
+};
+
+/**
+ * struct nft_chain_validate_state - validation state
+ *
+ * If a chain is encountered again during table validation it is
+ * possible to avoid revalidation provided the calling context is
+ * compatible. This structure stores relevant calling context of
+ * previous validations.
+ *
+ * @hook_mask: the hook numbers and locations the chain is linked to
+ * @depth: the deepest call chain level the chain is linked to
+ */
+struct nft_chain_validate_state {
+ u8 hook_mask[NFT_CHAIN_T_MAX];
+ u8 depth;
+};
+
/**
* struct nft_chain - nf_tables chain
*
@@ -1109,6 +1137,7 @@ struct nft_rule_blob {
* @udlen: user data length
* @udata: user data in the chain
* @blob_next: rule blob pointer to the next in the chain
+ * @vstate: validation state
*/
struct nft_chain {
struct nft_rule_blob __rcu *blob_gen_0;
@@ -1128,9 +1157,10 @@ struct nft_chain {
/* Only used during control plane commit phase: */
struct nft_rule_blob *blob_next;
+ struct nft_chain_validate_state vstate;
};
-int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
+int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain);
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv);
@@ -1138,13 +1168,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
-enum nft_chain_types {
- NFT_CHAIN_T_DEFAULT = 0,
- NFT_CHAIN_T_ROUTE,
- NFT_CHAIN_T_NAT,
- NFT_CHAIN_T_MAX
-};
-
/**
* struct nft_chain_type - nf_tables chain type info
*
@@ -1838,6 +1861,11 @@ struct nft_trans_gc {
struct rcu_head rcu;
};
+static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
static inline void nft_ctx_update(struct nft_ctx *ctx,
const struct nft_trans *trans)
{
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index a0633eeaec97..c53ac00bb974 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -42,7 +42,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
if (ip6h->version != 6)
return -1;
- pkt_len = ntohs(ip6h->payload_len);
+ pkt_len = ipv6_payload_len(pkt->skb, ip6h);
skb_len = pkt->skb->len - skb_network_offset(pkt->skb);
if (pkt_len + sizeof(*ip6h) > skb_len)
return -1;
@@ -86,7 +86,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt)
if (ip6h->version != 6)
goto inhdr_error;
- pkt_len = ntohs(ip6h->payload_len);
+ pkt_len = ipv6_payload_len(pkt->skb, ip6h);
if (pkt_len + sizeof(*ip6h) > pkt->skb->len) {
idev = __in6_dev_get(nft_in(pkt));
__IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS);
diff --git a/include/net/netmem.h b/include/net/netmem.h
index f7dacc9e75fd..a96b3e5e5574 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -68,10 +68,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
enum net_iov_type {
NET_IOV_DMABUF,
NET_IOV_IOURING,
-
- /* Force size to unsigned long to make the NET_IOV_ASSERTS below pass.
- */
- NET_IOV_MAX = ULONG_MAX
};
/* A memory descriptor representing abstract networking I/O vectors,
@@ -247,6 +243,23 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
return page_to_pfn(netmem_to_page(netmem));
}
+/* XXX: How to extract netmem_desc from page must be changed, once
+ * netmem_desc no longer overlays on page and will be allocated through
+ * slab.
+ */
+#define __pp_page_to_nmdesc(p) (_Generic((p), \
+ const struct page * : (const struct netmem_desc *)(p), \
+ struct page * : (struct netmem_desc *)(p)))
+
+/* CAUTION: Check if the page is a pp page before calling this helper or
+ * know it's a pp page.
+ */
+#define pp_page_to_nmdesc(p) \
+({ \
+ DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \
+ __pp_page_to_nmdesc(p); \
+})
+
/**
* __netmem_to_nmdesc - unsafely get pointer to the &netmem_desc backing
* @netmem
@@ -265,42 +278,25 @@ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem)
return (__force struct netmem_desc *)netmem;
}
-/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to
- * common fields.
- * @netmem: netmem reference to extract as net_iov.
- *
- * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic,
- * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access
- * these fields without a type check to make sure that the underlying mem is
- * net_iov or page.
+/* netmem_to_nmdesc - convert netmem_ref to struct netmem_desc * for
+ * access to common fields.
+ * @netmem: netmem reference to get netmem_desc.
*
- * The resulting value of this function can only be used to access the fields
- * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in
- * undefined behavior.
+ * All the sub types of netmem_ref (netmem_desc, net_iov) have the same
+ * pp, pp_magic, dma_addr, and pp_ref_count fields via netmem_desc.
*
- * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
+ * Return: the pointer to struct netmem_desc * regardless of its
+ * underlying type.
*/
-static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
+static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
{
- return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
-}
+ void *p = (void *)((__force unsigned long)netmem & ~NET_IOV);
-/* XXX: How to extract netmem_desc from page must be changed, once
- * netmem_desc no longer overlays on page and will be allocated through
- * slab.
- */
-#define __pp_page_to_nmdesc(p) (_Generic((p), \
- const struct page * : (const struct netmem_desc *)(p), \
- struct page * : (struct netmem_desc *)(p)))
+ if (netmem_is_net_iov(netmem))
+ return &((struct net_iov *)p)->desc;
-/* CAUTION: Check if the page is a pp page before calling this helper or
- * know it's a pp page.
- */
-#define pp_page_to_nmdesc(p) \
-({ \
- DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \
- __pp_page_to_nmdesc(p); \
-})
+ return __pp_page_to_nmdesc((struct page *)p);
+}
/**
* __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem
@@ -320,12 +316,12 @@ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem)
static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
{
- return __netmem_clear_lsb(netmem)->pp;
+ return netmem_to_nmdesc(netmem)->pp;
}
static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
{
- return &__netmem_clear_lsb(netmem)->pp_ref_count;
+ return &netmem_to_nmdesc(netmem)->pp_ref_count;
}
static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
@@ -390,11 +386,39 @@ static inline bool netmem_is_pfmemalloc(netmem_ref netmem)
static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
{
- return __netmem_clear_lsb(netmem)->dma_addr;
+ return netmem_to_nmdesc(netmem)->dma_addr;
}
-void get_netmem(netmem_ref netmem);
-void put_netmem(netmem_ref netmem);
+#if defined(CONFIG_NET_DEVMEM)
+static inline bool net_is_devmem_iov(const struct net_iov *niov)
+{
+ return niov->type == NET_IOV_DMABUF;
+}
+#else
+static inline bool net_is_devmem_iov(const struct net_iov *niov)
+{
+ return false;
+}
+#endif
+
+void __get_netmem(netmem_ref netmem);
+void __put_netmem(netmem_ref netmem);
+
+static __always_inline void get_netmem(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ __get_netmem(netmem);
+ else
+ get_page(netmem_to_page(netmem));
+}
+
+static __always_inline void put_netmem(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ __put_netmem(netmem);
+ else
+ put_page(netmem_to_page(netmem));
+}
#define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \
do { \
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 9b36f0ff0c20..9ef3d70e5e9c 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -13,9 +13,11 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ int sysctl_txq_reselection;
int sysctl_optmem_max;
u8 sysctl_txrehash;
u8 sysctl_tstamp_allow_data;
+ u8 sysctl_bypass_prot_mem;
#ifdef CONFIG_PROC_FS
struct prot_inuse __percpu *prot_inuse;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 34eb3aecb3f2..8e971c7bf164 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -74,19 +74,26 @@ struct netns_ipv4 {
/* TXRX readonly hotpath cache lines */
__cacheline_group_begin(netns_ipv4_read_txrx);
- u8 sysctl_tcp_moderate_rcvbuf;
__cacheline_group_end(netns_ipv4_read_txrx);
/* RX readonly hotpath cache line */
__cacheline_group_begin(netns_ipv4_read_rx);
+ u8 sysctl_tcp_moderate_rcvbuf;
u8 sysctl_ip_early_demux;
u8 sysctl_tcp_early_demux;
u8 sysctl_tcp_l3mdev_accept;
/* 3 bytes hole, try to pack */
int sysctl_tcp_reordering;
int sysctl_tcp_rmem[3];
+ int sysctl_tcp_rcvbuf_low_rtt;
__cacheline_group_end(netns_ipv4_read_rx);
+ /* ICMP rate limiter hot cache line. */
+ __cacheline_group_begin_aligned(icmp);
+ atomic_t icmp_global_credit;
+ u32 icmp_global_stamp;
+ __cacheline_group_end_aligned(icmp);
+
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;
@@ -135,12 +142,12 @@ struct netns_ipv4 {
u8 sysctl_icmp_echo_ignore_broadcasts;
u8 sysctl_icmp_ignore_bogus_error_responses;
u8 sysctl_icmp_errors_use_inbound_ifaddr;
+ u8 sysctl_icmp_errors_extension_mask;
int sysctl_icmp_ratelimit;
int sysctl_icmp_ratemask;
int sysctl_icmp_msgs_per_sec;
int sysctl_icmp_msgs_burst;
- atomic_t icmp_global_credit;
- u32 icmp_global_stamp;
+
u32 ip_rt_min_pmtu;
int ip_rt_mtu_expires;
int ip_rt_min_advmss;
@@ -220,6 +227,7 @@ struct netns_ipv4 {
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
unsigned int sysctl_tcp_child_ehash_entries;
+ int sysctl_tcp_comp_sack_rtt_percent;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
int sysctl_max_syn_backlog;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 47dc70d8100a..34bdb1308e8f 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,19 +30,23 @@ struct netns_sysctl_ipv6 {
int ip6_rt_min_advmss;
u32 multipath_hash_fields;
u8 multipath_hash_policy;
- u8 bindv6only;
+
+ __cacheline_group_begin(sysctl_ipv6_flowlabel);
u8 flowlabel_consistency;
u8 auto_flowlabels;
- int icmpv6_time;
+ u8 flowlabel_state_ranges;
+ __cacheline_group_end(sysctl_ipv6_flowlabel);
+
u8 icmpv6_echo_ignore_all;
u8 icmpv6_echo_ignore_multicast;
u8 icmpv6_echo_ignore_anycast;
+ int icmpv6_time;
DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1);
unsigned long *icmpv6_ratemask_ptr;
u8 anycast_src_echo_reply;
+ u8 bindv6only;
u8 ip_nonlocal_bind;
u8 fwmark_reflect;
- u8 flowlabel_state_ranges;
int idgen_retries;
int idgen_delay;
int flowlabel_reflect;
@@ -56,6 +60,7 @@ struct netns_sysctl_ipv6 {
u8 skip_notify_on_dev_down;
u8 fib_notify_on_flag_change;
u8 icmpv6_error_anycast_as_unicast;
+ u8 icmpv6_errors_extension_mask;
};
struct netns_ipv6 {
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index 19ad2574b267..6682e51513ef 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -16,6 +16,7 @@ struct netns_mpls {
int default_ttl;
size_t platform_labels;
struct mpls_route __rcu * __rcu *platform_label;
+ struct mutex platform_mutex;
struct ctl_table_header *ctl;
};
diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
index fc752a50f91b..ed24c9f638ee 100644
--- a/include/net/netns/smc.h
+++ b/include/net/netns/smc.h
@@ -17,6 +17,9 @@ struct netns_smc {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *smc_hdr;
#endif
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+ struct smc_hs_ctrl __rcu *hs_ctrl;
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type;
int sysctl_smcr_testlink_time;
@@ -24,5 +27,7 @@ struct netns_smc {
int sysctl_rmem;
int sysctl_max_links_per_lgr;
int sysctl_max_conns_per_lgr;
+ unsigned int sysctl_smcr_max_send_wr;
+ unsigned int sysctl_smcr_max_recv_wr;
};
#endif
diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h
new file mode 100644
index 000000000000..dc8cbe45f406
--- /dev/null
+++ b/include/net/netns/vsock.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_NET_NAMESPACE_VSOCK_H
+#define __NET_NET_NAMESPACE_VSOCK_H
+
+#include <linux/types.h>
+
+enum vsock_net_mode {
+ VSOCK_NET_MODE_GLOBAL,
+ VSOCK_NET_MODE_LOCAL,
+};
+
+struct netns_vsock {
+ struct ctl_table_header *sysctl_hdr;
+
+ /* protected by the vsock_table_lock in af_vsock.c */
+ u32 port;
+
+ enum vsock_net_mode mode;
+ enum vsock_net_mode child_ns_mode;
+
+ /* 0 = unlocked, 1 = locked to global, 2 = locked to local */
+ int child_ns_mode_locked;
+};
+#endif /* __NET_NET_NAMESPACE_VSOCK_H */
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 127e6c7d910d..c54df042db6b 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -219,6 +219,8 @@ static inline void nfc_free_device(struct nfc_dev *dev)
int nfc_register_device(struct nfc_dev *dev);
+void nfc_unregister_rfkill(struct nfc_dev *dev);
+void nfc_remove_device(struct nfc_dev *dev);
void nfc_unregister_device(struct nfc_dev *dev);
/**
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index a994dea74596..442822746e92 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -191,14 +191,12 @@ enum nl802154_iftype {
* @NL802154_CAP_ATTR_CHANNELS: a nested attribute for nl802154_channel_attr
* @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for
* nl802154_wpan_phy_tx_power
- * @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level
- * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maximum value for cca_ed_level
* @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags
* @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags
* @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value
* @NL802154_CAP_ATTR_MAX_MINBE: maximum of minbe value
* @NL802154_CAP_ATTR_MIN_MAXBE: minimum of maxbe value
- * @NL802154_CAP_ATTR_MAX_MINBE: maximum of maxbe value
+ * @NL802154_CAP_ATTR_MAX_MAXBE: maximum of maxbe value
* @NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS: minimum of csma backoff value
* @NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS: maximum of csma backoffs value
* @NL802154_CAP_ATTR_MIN_FRAME_RETRIES: minimum of frame retries value
@@ -364,6 +362,7 @@ enum nl802154_cca_opts {
NL802154_CCA_OPT_ENERGY_CARRIER_AND,
NL802154_CCA_OPT_ENERGY_CARRIER_OR,
+ /* private: */
/* keep last */
__NL802154_CCA_OPT_ATTR_AFTER_LAST,
NL802154_CCA_OPT_ATTR_MAX = __NL802154_CCA_OPT_ATTR_AFTER_LAST - 1
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 1509a536cb85..cdd95477af7a 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -161,6 +161,7 @@ struct memory_provider_ops;
struct pp_memory_provider_params {
void *mp_priv;
const struct memory_provider_ops *mp_ops;
+ u32 rx_page_size;
};
struct page_pool {
@@ -246,7 +247,7 @@ struct page_pool {
/* User-facing fields, protected by page_pools_lock */
struct {
struct hlist_node list;
- u64 detach_time;
+ ktime_t detach_time;
u32 id;
} user;
};
diff --git a/include/net/phy/realtek_phy.h b/include/net/phy/realtek_phy.h
new file mode 100644
index 000000000000..d683bc1b0659
--- /dev/null
+++ b/include/net/phy/realtek_phy.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _REALTEK_PHY_H
+#define _REALTEK_PHY_H
+
+#define PHY_ID_RTL_DUMMY_SFP 0x001ccbff
+
+#endif /* _REALTEK_PHY_H */
diff --git a/include/net/ping.h b/include/net/ping.h
index 9634b8800814..05bfd594a64c 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -58,7 +58,7 @@ void ping_unhash(struct sock *sk);
int ping_init_sock(struct sock *sk);
void ping_close(struct sock *sk, long timeout);
-int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+int ping_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
void ping_err(struct sk_buff *skb, int offset, u32 info);
int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd,
struct sk_buff *);
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 8a75c73fc555..18a419cd9d94 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -25,11 +25,6 @@ struct qdisc_walker {
const struct Qdisc * : (const void *)&q->privdata, \
struct Qdisc * : (void *)&q->privdata)
-static inline struct Qdisc *qdisc_from_priv(void *priv)
-{
- return container_of(priv, struct Qdisc, privdata);
-}
-
/*
Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth
@@ -48,7 +43,6 @@ static inline struct Qdisc *qdisc_from_priv(void *priv)
*/
typedef u64 psched_time_t;
-typedef long psched_tdiff_t;
/* Avoid doing 64 bit divide */
#define PSCHED_SHIFT 6
@@ -120,12 +114,13 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
void __qdisc_run(struct Qdisc *q);
-static inline void qdisc_run(struct Qdisc *q)
+static inline struct sk_buff *qdisc_run(struct Qdisc *q)
{
if (qdisc_run_begin(q)) {
__qdisc_run(q);
- qdisc_run_end(q);
+ return qdisc_run_end(q);
}
+ return NULL;
}
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -313,4 +308,28 @@ static inline unsigned int qdisc_peek_len(struct Qdisc *sch)
return len;
}
+static inline void qdisc_lock_init(struct Qdisc *sch,
+ const struct Qdisc_ops *ops)
+{
+ spin_lock_init(&sch->q.lock);
+
+ /* Skip dynamic keys if nesting is not possible */
+ if (ops->static_flags & TCQ_F_INGRESS ||
+ ops == &noqueue_qdisc_ops)
+ return;
+
+ lockdep_register_key(&sch->root_lock_key);
+ lockdep_set_class(&sch->q.lock, &sch->root_lock_key);
+}
+
+static inline void qdisc_lock_uninit(struct Qdisc *sch,
+ const struct Qdisc_ops *ops)
+{
+ if (ops->static_flags & TCQ_F_INGRESS ||
+ ops == &noqueue_qdisc_ops)
+ return;
+
+ lockdep_unregister_key(&sch->root_lock_key);
+}
+
#endif
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
index 8e91a8fa31b5..ad6d703ce6fe 100644
--- a/include/net/proto_memory.h
+++ b/include/net/proto_memory.h
@@ -35,6 +35,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_sk_under_memory_pressure(sk))
return true;
+ if (sk->sk_bypass_prot_mem)
+ return false;
+
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
diff --git a/include/net/psp/types.h b/include/net/psp/types.h
index 31cee64b7c86..25a9096d4e7d 100644
--- a/include/net/psp/types.h
+++ b/include/net/psp/types.h
@@ -59,6 +59,10 @@ struct psp_dev_config {
* device key
* @stale_assocs: associations which use a rotated out key
*
+ * @stats: statistics maintained by the core
+ * @stats.rotations: See stats attr key-rotations
+ * @stats.stales: See stats attr stale-events
+ *
* @rcu: RCU head for freeing the structure
*/
struct psp_dev {
@@ -81,6 +85,11 @@ struct psp_dev {
struct list_head prev_assocs;
struct list_head stale_assocs;
+ struct {
+ unsigned long rotations;
+ unsigned long stales;
+ } stats;
+
struct rcu_head rcu;
};
@@ -141,6 +150,22 @@ struct psp_assoc {
u8 drv_data[] __aligned(8);
};
+struct psp_dev_stats {
+ union {
+ struct {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_auth_fail;
+ u64 rx_error;
+ u64 rx_bad;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_error;
+ };
+ DECLARE_FLEX_ARRAY(u64, required);
+ };
+};
+
/**
* struct psp_dev_ops - netdev driver facing PSP callbacks
*/
@@ -179,6 +204,13 @@ struct psp_dev_ops {
* Remove an association from the device.
*/
void (*tx_key_del)(struct psp_dev *psd, struct psp_assoc *pas);
+
+ /**
+ * @get_stats: get statistics from the device
+ * Stats required by the spec must be maintained and filled in.
+ * Stats must be filled in member-by-member, never memset the struct.
+ */
+ void (*get_stats)(struct psp_dev *psd, struct psp_dev_stats *stats);
};
#endif /* __NET_PSP_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index cd4d4cf71d0d..5a9c826a7092 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -36,7 +36,6 @@ struct request_sock_ops {
struct sk_buff *skb,
enum sk_rst_reason reason);
void (*destructor)(struct request_sock *req);
- void (*syn_ack_timeout)(const struct request_sock *req);
};
struct saved_syn {
@@ -124,14 +123,7 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb,
return sk;
}
-static inline void __reqsk_free(struct request_sock *req)
-{
- req->rsk_ops->destructor(req);
- if (req->rsk_listener)
- sock_put(req->rsk_listener);
- kfree(req->saved_syn);
- kmem_cache_free(req->rsk_ops->slab, req);
-}
+void __reqsk_free(struct request_sock *req);
static inline void reqsk_free(struct request_sock *req)
{
@@ -197,8 +189,6 @@ struct request_sock_queue {
*/
};
-void reqsk_queue_alloc(struct request_sock_queue *queue);
-
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
bool reset);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 738cd5b13c62..c3d657359a3d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -41,13 +41,6 @@ enum qdisc_state_t {
__QDISC_STATE_DRAINING,
};
-enum qdisc_state2_t {
- /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
- * Use qdisc_run_begin/end() or qdisc_is_running() instead.
- */
- __QDISC_STATE2_RUNNING,
-};
-
#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
#define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING)
@@ -95,6 +88,8 @@ struct Qdisc {
#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */
+#define TCQ_F_DEQUEUE_DROPS 0x400 /* ->dequeue() can drop packets in q->to_free */
+
u32 limit;
const struct Qdisc_ops *ops;
struct qdisc_size_table __rcu *stab;
@@ -110,20 +105,30 @@ struct Qdisc {
int pad;
refcount_t refcnt;
- /*
- * For performance sake on SMP, we put highly modified fields at the end
- */
- struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
- struct qdisc_skb_head q;
- struct gnet_stats_basic_sync bstats;
- struct gnet_stats_queue qstats;
- int owner;
- unsigned long state;
- unsigned long state2; /* must be written under qdisc spinlock */
- struct Qdisc *next_sched;
- struct sk_buff_head skb_bad_txq;
-
- spinlock_t busylock ____cacheline_aligned_in_smp;
+ /* Cache line potentially dirtied in dequeue() or __netif_reschedule(). */
+ __cacheline_group_begin(Qdisc_read_mostly) ____cacheline_aligned;
+ struct sk_buff_head gso_skb;
+ struct Qdisc *next_sched;
+ struct sk_buff_head skb_bad_txq;
+ __cacheline_group_end(Qdisc_read_mostly);
+
+ /* Fields dirtied in dequeue() fast path. */
+ __cacheline_group_begin(Qdisc_write) ____cacheline_aligned;
+ struct qdisc_skb_head q;
+ unsigned long state;
+ struct gnet_stats_basic_sync bstats;
+ bool running; /* must be written under qdisc spinlock */
+
+ /* Note : we only change qstats.backlog in fast path. */
+ struct gnet_stats_queue qstats;
+
+ struct sk_buff *to_free;
+ __cacheline_group_end(Qdisc_write);
+
+
+ atomic_long_t defer_count ____cacheline_aligned_in_smp;
+ struct llist_head defer_list;
+
spinlock_t seqlock;
struct rcu_head rcu;
@@ -168,7 +173,7 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK)
return spin_is_locked(&qdisc->seqlock);
- return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+ return READ_ONCE(qdisc->running);
}
static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
@@ -211,11 +216,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
*/
return spin_trylock(&qdisc->seqlock);
}
- return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+ if (READ_ONCE(qdisc->running))
+ return false;
+ WRITE_ONCE(qdisc->running, true);
+ return true;
}
-static inline void qdisc_run_end(struct Qdisc *qdisc)
+static inline struct sk_buff *qdisc_run_end(struct Qdisc *qdisc)
{
+ struct sk_buff *to_free = NULL;
+
if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);
@@ -228,9 +238,16 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
if (unlikely(test_bit(__QDISC_STATE_MISSED,
&qdisc->state)))
__netif_schedule(qdisc);
- } else {
- __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+ return NULL;
}
+
+ if (qdisc->flags & TCQ_F_DEQUEUE_DROPS) {
+ to_free = qdisc->to_free;
+ if (to_free)
+ qdisc->to_free = NULL;
+ }
+ WRITE_ONCE(qdisc->running, false);
+ return to_free;
}
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -432,13 +449,16 @@ struct tcf_proto {
};
struct qdisc_skb_cb {
- struct {
- unsigned int pkt_len;
- u16 slave_dev_queue_mapping;
- u16 tc_classid;
- };
+ unsigned int pkt_len;
+ u16 pkt_segs;
+ u16 tc_classid;
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
+
+ u16 slave_dev_queue_mapping;
+ u8 post_ct:1;
+ u8 post_ct_snat:1;
+ u8 post_ct_dnat:1;
};
typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
@@ -696,6 +716,34 @@ void qdisc_destroy(struct Qdisc *qdisc);
void qdisc_put(struct Qdisc *qdisc);
void qdisc_put_unlocked(struct Qdisc *qdisc);
void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
+
+static inline void dev_reset_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+{
+ struct Qdisc *qdisc;
+ bool nolock;
+
+ qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
+ if (!qdisc)
+ return;
+
+ nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
+ spin_lock_bh(qdisc_lock(qdisc));
+
+ qdisc_reset(qdisc);
+
+ spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock) {
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+ spin_unlock_bh(&qdisc->seqlock);
+ }
+}
+
#ifdef CONFIG_NET_SCHED
int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
void *type_data);
@@ -758,13 +806,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
struct Qdisc *qdisc;
+ bool nolock;
for (; i < dev->num_tx_queues; i++) {
qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
if (qdisc) {
+ nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock) {
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+ spin_unlock_bh(&qdisc->seqlock);
+ }
}
}
}
@@ -829,6 +887,15 @@ static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
return qdisc_skb_cb(skb)->pkt_len;
}
+static inline unsigned int qdisc_pkt_segs(const struct sk_buff *skb)
+{
+ u32 pkt_segs = qdisc_skb_cb(skb)->pkt_segs;
+
+ DEBUG_NET_WARN_ON_ONCE(pkt_segs !=
+ (skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1));
+ return pkt_segs;
+}
+
/* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */
enum net_xmit_qdisc_t {
__NET_XMIT_STOLEN = 0x00010000,
@@ -870,9 +937,7 @@ static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
const struct sk_buff *skb)
{
- _bstats_update(bstats,
- qdisc_pkt_len(skb),
- skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
+ _bstats_update(bstats, qdisc_pkt_len(skb), qdisc_pkt_segs(skb));
}
static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
@@ -1067,11 +1132,8 @@ struct tc_skb_cb {
struct qdisc_skb_cb qdisc_cb;
u32 drop_reason;
- u16 zone; /* Only valid if post_ct = true */
+ u16 zone; /* Only valid if qdisc_skb_cb(skb)->post_ct = true */
u16 mru;
- u8 post_ct:1;
- u8 post_ct_snat:1;
- u8 post_ct_dnat:1;
};
static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
@@ -1094,6 +1156,28 @@ static inline void tcf_set_drop_reason(const struct sk_buff *skb,
tc_skb_cb(skb)->drop_reason = reason;
}
+static inline void tcf_kfree_skb_list(struct sk_buff *skb)
+{
+ while (unlikely(skb)) {
+ struct sk_buff *next = skb->next;
+
+ prefetch(next);
+ kfree_skb_reason(skb, tcf_get_drop_reason(skb));
+ skb = next;
+ }
+}
+
+static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
+ enum skb_drop_reason reason)
+{
+ DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS));
+ DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK);
+
+ tcf_set_drop_reason(skb, reason);
+ skb->next = q->to_free;
+ q->to_free = skb;
+}
+
/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
@@ -1373,6 +1457,11 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
struct tcf_block *block);
+static inline bool mini_qdisc_pair_inited(struct mini_Qdisc_pair *miniqp)
+{
+ return !!miniqp->p_miniq;
+}
+
void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);
int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
diff --git a/include/net/sch_priv.h b/include/net/sch_priv.h
new file mode 100644
index 000000000000..4789f668ae87
--- /dev/null
+++ b/include/net/sch_priv.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_SCHED_PRIV_H
+#define __NET_SCHED_PRIV_H
+
+#include <net/sch_generic.h>
+
+struct mq_sched {
+ struct Qdisc **qdiscs;
+};
+
+int mq_init_common(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack,
+ const struct Qdisc_ops *qdisc_ops);
+void mq_destroy_common(struct Qdisc *sch);
+void mq_attach(struct Qdisc *sch);
+void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb);
+struct netdev_queue *mq_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm);
+struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl);
+unsigned long mq_find(struct Qdisc *sch, u32 classid);
+int mq_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm);
+int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d);
+void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg);
+
+#endif
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index 3d5879e08e78..6f2cd562b1de 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -72,7 +72,6 @@ struct sctp_shared_key *sctp_auth_get_shkey(
int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep,
struct sctp_association *asoc,
gfp_t gfp);
-int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp);
const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id);
const struct sctp_hmac *
sctp_auth_asoc_get_hmac(const struct sctp_association *asoc);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index e96d1bd087f6..58242b37b47a 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -85,7 +85,7 @@ void sctp_udp_sock_stop(struct net *net);
/*
* sctp/socket.c
*/
-int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
+int sctp_inet_connect(struct socket *sock, struct sockaddr_unsized *uaddr,
int addr_len, int flags);
int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
int sctp_inet_listen(struct socket *sock, int backlog);
@@ -94,8 +94,7 @@ void sctp_data_ready(struct sock *sk);
__poll_t sctp_poll(struct file *file, struct socket *sock,
poll_table *wait);
void sctp_sock_rfree(struct sk_buff *skb);
-void sctp_copy_sock(struct sock *newsk, struct sock *sk,
- struct sctp_association *asoc);
+
extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *);
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index 8034bf5febbe..77806ef1cb70 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -52,10 +52,10 @@ void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch);
void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch);
int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
-struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream);
+const struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream);
void sctp_sched_ops_register(enum sctp_sched_type sched,
- struct sctp_sched_ops *sched_ops);
+ const struct sctp_sched_ops *sched_ops);
void sctp_sched_ops_prio_init(void);
void sctp_sched_ops_rr_init(void);
void sctp_sched_ops_fc_init(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 2ae390219efd..affee44bd38e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -228,10 +228,6 @@ struct sctp_sock {
atomic_t pd_mode;
- /* Fields after this point will be skipped on copies, like on accept
- * and peeloff operations
- */
-
/* Receive to here while partial delivery is in effect. */
struct sk_buff_head pd_lobby;
@@ -497,9 +493,6 @@ struct sctp_pf {
int (*bind_verify) (struct sctp_sock *, union sctp_addr *);
int (*send_verify) (struct sctp_sock *, union sctp_addr *);
int (*supported_addrs)(const struct sctp_sock *, __be16 *);
- struct sock *(*create_accept_sk) (struct sock *sk,
- struct sctp_association *asoc,
- bool kern);
int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
@@ -1076,7 +1069,7 @@ struct sctp_outq {
struct list_head out_chunk_list;
/* Stream scheduler being used */
- struct sctp_sched_ops *sched;
+ const struct sctp_sched_ops *sched;
unsigned int out_qlen; /* Total length of queued data chunks. */
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index cddebafb9f77..6f996229167b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -5,16 +5,47 @@
#include <linux/types.h>
struct net;
+extern struct net init_net;
+
+union tcp_seq_and_ts_off {
+ struct {
+ u32 seq;
+ u32 ts_off;
+ };
+ u64 hash64;
+};
u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
-u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport);
-u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
-u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport);
-u32 secure_tcpv6_ts_off(const struct net *net,
- const __be32 *saddr, const __be32 *daddr);
+union tcp_seq_and_ts_off
+secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport);
+
+static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport)
+{
+ union tcp_seq_and_ts_off ts;
+
+ ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr,
+ sport, dport);
+
+ return ts.seq;
+}
+
+union tcp_seq_and_ts_off
+secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr,
+ const __be32 *daddr,
+ __be16 sport, __be16 dport);
+
+static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport)
+{
+ union tcp_seq_and_ts_off ts;
+
+ ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr,
+ sport, dport);
+ return ts.seq;
+}
#endif /* _NET_SECURE_SEQ */
diff --git a/include/net/selftests.h b/include/net/selftests.h
index e65e8d230d33..c36e07406ad4 100644
--- a/include/net/selftests.h
+++ b/include/net/selftests.h
@@ -3,9 +3,48 @@
#define _NET_SELFTESTS
#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+
+struct net_packet_attrs {
+ const unsigned char *src;
+ const unsigned char *dst;
+ u32 ip_src;
+ u32 ip_dst;
+ bool tcp;
+ u16 sport;
+ u16 dport;
+ int timeout;
+ int size;
+ int max_size;
+ u8 id;
+ u16 queue_mapping;
+ bool bad_csum;
+};
+
+struct net_test_priv {
+ struct net_packet_attrs *packet;
+ struct packet_type pt;
+ struct completion comp;
+ int double_vlan;
+ int vlan_id;
+ int ok;
+};
+
+struct netsfhdr {
+ __be32 version;
+ __be64 magic;
+ u8 id;
+} __packed;
+
+#define NET_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+ sizeof(struct netsfhdr))
+#define NET_TEST_PKT_MAGIC 0xdeadcafecafedeadULL
+#define NET_LB_TIMEOUT msecs_to_jiffies(200)
#if IS_ENABLED(CONFIG_NET_SELFTESTS)
+struct sk_buff *net_test_get_skb(struct net_device *ndev, u8 id,
+ struct net_packet_attrs *attr);
void net_selftest(struct net_device *ndev, struct ethtool_test *etest,
u64 *buf);
int net_selftest_get_count(void);
@@ -13,6 +52,12 @@ void net_selftest_get_strings(u8 *data);
#else
+static inline struct sk_buff *net_test_get_skb(struct net_device *ndev, u8 id,
+ struct net_packet_attrs *attr)
+{
+ return NULL;
+}
+
static inline void net_selftest(struct net_device *ndev, struct ethtool_test *etest,
u64 *buf)
{
diff --git a/include/net/smc.h b/include/net/smc.h
index 08bee529ed8d..bfdc4c41f019 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -17,6 +17,8 @@
#include <linux/wait.h>
#include <linux/dibs.h>
+struct tcp_sock;
+struct inet_request_sock;
struct sock;
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
@@ -50,4 +52,55 @@ struct smcd_dev {
u8 going_away : 1;
};
+#define SMC_HS_CTRL_NAME_MAX 16
+
+enum {
+ /* ops can be inherit from init_net */
+ SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,
+
+ SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
+};
+
+struct smc_hs_ctrl {
+ /* private */
+
+ struct list_head list;
+ struct module *owner;
+
+ /* public */
+
+ /* unique name */
+ char name[SMC_HS_CTRL_NAME_MAX];
+ int flags;
+
+ /* Invoked before computing SMC option for SYN packets.
+ * We can control whether to set SMC options by returning various value.
+ * Return 0 to disable SMC, or return any other value to enable it.
+ */
+ int (*syn_option)(struct tcp_sock *tp);
+
+ /* Invoked before Set up SMC options for SYN-ACK packets
+ * We can control whether to respond SMC options by returning various
+ * value. Return 0 to disable SMC, or return any other value to enable
+ * it.
+ */
+ int (*synack_option)(const struct tcp_sock *tp,
+ struct inet_request_sock *ireq);
+};
+
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+#define smc_call_hsbpf(init_val, tp, func, ...) ({ \
+ typeof(init_val) __ret = (init_val); \
+ struct smc_hs_ctrl *ctrl; \
+ rcu_read_lock(); \
+ ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \
+ if (ctrl && ctrl->func) \
+ __ret = ctrl->func(tp, ##__VA_ARGS__); \
+ rcu_read_unlock(); \
+ __ret; \
+})
+#else
+#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); })
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
+
#endif /* _SMC_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 60bcb13f045c..6c9a83016e95 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -118,6 +118,7 @@ typedef __u64 __bitwise __addrpair;
* @skc_reuseport: %SO_REUSEPORT setting
* @skc_ipv6only: socket is IPV6 only
* @skc_net_refcnt: socket is using net ref counting
+ * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
@@ -174,6 +175,7 @@ struct sock_common {
unsigned char skc_reuseport:1;
unsigned char skc_ipv6only:1;
unsigned char skc_net_refcnt:1;
+ unsigned char skc_bypass_prot_mem:1;
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
@@ -303,6 +305,8 @@ struct sk_filter;
* @sk_txrehash: enable TX hash rethink
* @sk_filter: socket filtering instructions
* @sk_timer: sock cleanup timer
+ * @tcp_retransmit_timer: tcp retransmit timer
+ * @mptcp_retransmit_timer: mptcp retransmit timer
* @sk_stamp: time stamp of last packet received
* @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
* @sk_tsflags: SO_TIMESTAMPING flags
@@ -313,6 +317,7 @@ struct sk_filter;
* @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock
* for timestamping
* @sk_tskey: counter to disambiguate concurrent tstamp requests
+ * @sk_tx_queue_mapping_jiffies: time in jiffies of last @sk_tx_queue_mapping refresh.
* @sk_zckey: counter to order MSG_ZEROCOPY notifications
* @sk_socket: Identd and reporting IO signals
* @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock.
@@ -336,6 +341,7 @@ struct sk_filter;
* @sk_reuseport_cb: reuseport group container
* @sk_bpf_storage: ptr to cache and control for bpf_sk_storage
* @sk_rcu: used during RCU grace period
+ * @sk_freeptr: used for SLAB_TYPESAFE_BY_RCU managed sockets
* @sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
@@ -380,6 +386,7 @@ struct sock {
#define sk_reuseport __sk_common.skc_reuseport
#define sk_ipv6only __sk_common.skc_ipv6only
#define sk_net_refcnt __sk_common.skc_net_refcnt
+#define sk_bypass_prot_mem __sk_common.skc_bypass_prot_mem
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
@@ -477,17 +484,21 @@ struct sock {
struct rb_root tcp_rtx_queue;
};
struct sk_buff_head sk_write_queue;
- u32 sk_dst_pending_confirm;
- u32 sk_pacing_status; /* see enum sk_pacing */
struct page_frag sk_frag;
- struct timer_list sk_timer;
-
+ union {
+ struct timer_list sk_timer;
+ struct timer_list tcp_retransmit_timer;
+ struct timer_list mptcp_retransmit_timer;
+ };
unsigned long sk_pacing_rate; /* bytes per second */
atomic_t sk_zckey;
atomic_t sk_tskey;
+ unsigned long sk_tx_queue_mapping_jiffies;
__cacheline_group_end(sock_write_tx);
__cacheline_group_begin(sock_read_tx);
+ u32 sk_dst_pending_confirm;
+ u32 sk_pacing_status; /* see enum sk_pacing */
unsigned long sk_max_pacing_rate;
long sk_sndtimeo;
u32 sk_priority;
@@ -572,7 +583,14 @@ struct sock {
struct bpf_local_storage __rcu *sk_bpf_storage;
#endif
struct numa_drop_counters *sk_drop_counters;
- struct rcu_head sk_rcu;
+ /* sockets using SLAB_TYPESAFE_BY_RCU can use sk_freeptr.
+ * By the time kfree() is called, sk_rcu can not be in
+ * use and can be mangled.
+ */
+ union {
+ struct rcu_head sk_rcu;
+ freeptr_t sk_freeptr;
+ };
netns_tracker ns_tracker;
struct xarray sk_user_frags;
@@ -828,11 +846,9 @@ static inline bool sk_del_node_init(struct sock *sk)
{
bool rc = __sk_del_node_init(sk);
- if (rc) {
- /* paranoid for a while -acme */
- WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
+ if (rc)
__sock_put(sk);
- }
+
return rc;
}
#define sk_del_node_init_rcu(sk) sk_del_node_init(sk)
@@ -850,14 +866,25 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
{
bool rc = __sk_nulls_del_node_init_rcu(sk);
- if (rc) {
- /* paranoid for a while -acme */
- WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
+ if (rc)
__sock_put(sk);
- }
+
return rc;
}
+static inline bool sk_nulls_replace_node_init_rcu(struct sock *old,
+ struct sock *new)
+{
+ if (sk_hashed(old)) {
+ hlist_nulls_replace_init_rcu(&old->sk_nulls_node,
+ &new->sk_nulls_node);
+ __sock_put(old);
+ return true;
+ }
+
+ return false;
+}
+
static inline void __sk_add_node(struct sock *sk, struct hlist_head *list)
{
hlist_add_head(&sk->sk_node, list);
@@ -1260,10 +1287,10 @@ struct proto {
void (*close)(struct sock *sk,
long timeout);
int (*pre_connect)(struct sock *sk,
- struct sockaddr *uaddr,
+ struct sockaddr_unsized *uaddr,
int addr_len);
int (*connect)(struct sock *sk,
- struct sockaddr *uaddr,
+ struct sockaddr_unsized *uaddr,
int addr_len);
int (*disconnect)(struct sock *sk, int flags);
@@ -1292,9 +1319,9 @@ struct proto {
size_t len, int flags, int *addr_len);
void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
- struct sockaddr *addr, int addr_len);
+ struct sockaddr_unsized *addr, int addr_len);
int (*bind_add)(struct sock *sk,
- struct sockaddr *addr, int addr_len);
+ struct sockaddr_unsized *addr, int addr_len);
int (*backlog_rcv) (struct sock *sk,
struct sk_buff *skb);
@@ -1349,6 +1376,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
+ unsigned int freeptr_offset;
unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
@@ -1617,6 +1645,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
sk_mem_reclaim(sk);
}
+void __sk_charge(struct sock *sk, gfp_t gfp);
+
#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
static inline void sk_owner_set(struct sock *sk, struct module *owner)
{
@@ -1808,7 +1838,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
void sk_free(struct sock *sk);
void sk_net_refcnt_upgrade(struct sock *sk);
void sk_destruct(struct sock *sk);
-struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
+struct sock *sk_clone(const struct sock *sk, const gfp_t priority, bool lock);
+
+static inline struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
+{
+ return sk_clone(sk, priority, true);
+}
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority);
@@ -1901,8 +1936,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
* Functions to fill in entries in struct proto_ops when a protocol
* does not implement a particular function.
*/
-int sock_no_bind(struct socket *, struct sockaddr *, int);
-int sock_no_connect(struct socket *, struct sockaddr *, int, int);
+int sock_no_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len);
+int sock_no_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *);
int sock_no_getname(struct socket *, struct sockaddr *, int);
@@ -1992,7 +2027,15 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
/* Paired with READ_ONCE() in sk_tx_queue_get() and
* other WRITE_ONCE() because socket lock might be not held.
*/
- WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue);
+ if (READ_ONCE(sk->sk_tx_queue_mapping) != tx_queue) {
+ WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue);
+ WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies);
+ return;
+ }
+
+ /* Refresh sk_tx_queue_mapping_jiffies if too old. */
+ if (time_is_before_jiffies(READ_ONCE(sk->sk_tx_queue_mapping_jiffies) + HZ))
+ WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies);
}
#define NO_QUEUE_MAPPING USHRT_MAX
@@ -2005,19 +2048,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING);
}
-static inline int sk_tx_queue_get(const struct sock *sk)
-{
- if (sk) {
- /* Paired with WRITE_ONCE() in sk_tx_queue_clear()
- * and sk_tx_queue_set().
- */
- int val = READ_ONCE(sk->sk_tx_queue_mapping);
-
- if (val != NO_QUEUE_MAPPING)
- return val;
- }
- return -1;
-}
+int sk_tx_queue_get(const struct sock *sk);
static inline void __sk_rx_queue_set(struct sock *sk,
const struct sk_buff *skb,
@@ -2067,7 +2098,7 @@ static inline int sk_rx_queue_get(const struct sock *sk)
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
- sk->sk_socket = sock;
+ WRITE_ONCE(sk->sk_socket, sock);
if (sock) {
WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
@@ -2303,6 +2334,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
return 0;
}
+#define SK_WMEM_ALLOC_BIAS 1
/**
* sk_wmem_alloc_get - returns write allocations
* @sk: socket
@@ -2311,7 +2343,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
*/
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) - 1;
+ return refcount_read(&sk->sk_wmem_alloc) - SK_WMEM_ALLOC_BIAS;
}
/**
@@ -2596,12 +2628,16 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
+static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc)
+{
+ return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1);
+}
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
static inline bool sock_writeable(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
+ return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc));
}
static inline gfp_t gfp_any(void)
@@ -2635,8 +2671,12 @@ static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
#endif /* CONFIG_MEMCG_V1 */
do {
- if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
+ if (time_before64(get_jiffies_64(),
+ mem_cgroup_get_socket_pressure(memcg))) {
+ memcg_memory_event(mem_cgroup_from_sk(sk),
+ MEMCG_SOCK_THROTTLED);
return true;
+ }
} while ((memcg = parent_mem_cgroup(memcg)));
return false;
@@ -3085,7 +3125,7 @@ void sock_set_reuseaddr(struct sock *sk);
void sock_set_reuseport(struct sock *sk);
void sock_set_sndtimeo(struct sock *sk, s64 secs);
-int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
+int sock_bind_add(struct sock *sk, struct sockaddr_unsized *addr, int addr_len);
int sock_get_timeout(long timeo, void *optval, bool old_timeval);
int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index c1a67149c6b6..e0fded18e18c 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params {
s32 tcfg_clockid;
size_t num_entries;
struct list_head entries;
+ struct rcu_head rcu;
};
#define GATE_ACT_GATE_OPEN BIT(0)
@@ -39,7 +40,7 @@ struct tcf_gate_params {
struct tcf_gate {
struct tc_action common;
- struct tcf_gate_params param;
+ struct tcf_gate_params __rcu *param;
u8 current_gate_status;
ktime_t current_close_time;
u32 current_entry_octets;
@@ -51,47 +52,65 @@ struct tcf_gate {
#define to_gate(a) ((struct tcf_gate *)a)
+static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a)
+{
+ struct tcf_gate *gact = to_gate(a);
+
+ return rcu_dereference_protected(gact->param,
+ lockdep_is_held(&gact->tcf_lock));
+}
+
static inline s32 tcf_gate_prio(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
s32 tcfg_prio;
- tcfg_prio = to_gate(a)->param.tcfg_priority;
+ p = tcf_gate_params_locked(a);
+ tcfg_prio = p->tcfg_priority;
return tcfg_prio;
}
static inline u64 tcf_gate_basetime(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u64 tcfg_basetime;
- tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+ p = tcf_gate_params_locked(a);
+ tcfg_basetime = p->tcfg_basetime;
return tcfg_basetime;
}
static inline u64 tcf_gate_cycletime(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u64 tcfg_cycletime;
- tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+ p = tcf_gate_params_locked(a);
+ tcfg_cycletime = p->tcfg_cycletime;
return tcfg_cycletime;
}
static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u64 tcfg_cycletimeext;
- tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+ p = tcf_gate_params_locked(a);
+ tcfg_cycletimeext = p->tcfg_cycletime_ext;
return tcfg_cycletimeext;
}
static inline u32 tcf_gate_num_entries(const struct tc_action *a)
{
+ struct tcf_gate_params *p;
u32 num_entries;
- num_entries = to_gate(a)->param.num_entries;
+ p = tcf_gate_params_locked(a);
+ num_entries = p->num_entries;
return num_entries;
}
@@ -105,7 +124,7 @@ static inline struct action_gate_entry
u32 num_entries;
int i = 0;
- p = &to_gate(a)->param;
+ p = tcf_gate_params_locked(a);
num_entries = p->num_entries;
list_for_each_entry(entry, &p->entries, list)
@@ -114,7 +133,7 @@ static inline struct action_gate_entry
if (i != num_entries)
return NULL;
- oe = kcalloc(num_entries, sizeof(*oe), GFP_ATOMIC);
+ oe = kzalloc_objs(*oe, num_entries, GFP_ATOMIC);
if (!oe)
return NULL;
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index c7f24a2da1ca..24d4d5a62b3c 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -13,15 +13,13 @@ struct tcf_ife_params {
u8 eth_src[ETH_ALEN];
u16 eth_type;
u16 flags;
-
+ struct list_head metalist;
struct rcu_head rcu;
};
struct tcf_ife_info {
struct tc_action common;
struct tcf_ife_params __rcu *params;
- /* list of metaids allowed */
- struct list_head metalist;
};
#define to_ife(a) ((struct tcf_ife_info *)a)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ab20f549b8f9..978eea2d5df0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -43,6 +43,7 @@
#include <net/dst.h>
#include <net/mptcp.h>
#include <net/xfrm.h>
+#include <net/secure_seq.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -303,6 +304,9 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk)
mem_cgroup_sk_under_memory_pressure(sk))
return true;
+ if (sk->sk_bypass_prot_mem)
+ return false;
+
return READ_ONCE(tcp_memory_pressure);
}
/*
@@ -344,6 +348,15 @@ extern struct proto tcp_prot;
#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
+/*
+ * TCP splice context
+ */
+struct tcp_splice_state {
+ struct pipe_inode_info *pipe;
+ size_t len;
+ unsigned int flags;
+};
+
void tcp_tsq_work_init(void);
int tcp_v4_err(struct sk_buff *skb, u32);
@@ -375,6 +388,8 @@ void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
void tcp_twsk_destructor(struct sock *sk);
void tcp_twsk_purge(struct list_head *net_exit_list);
+int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+ unsigned int offset, size_t len);
ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
@@ -461,6 +476,8 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
void tcp_enter_loss(struct sock *sk);
void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag);
void tcp_clear_retrans(struct tcp_sock *tp);
+void tcp_update_pacing_rate(struct sock *sk);
+void tcp_set_rto(struct sock *sk);
void tcp_update_metrics(struct sock *sk);
void tcp_init_metrics(struct sock *sk);
void tcp_metrics_init(void);
@@ -528,14 +545,17 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst,
struct request_sock *req_unhash,
- bool *own_req);
+ bool *own_req,
+ void (*opt_child_init)(struct sock *newsk,
+ const struct sock *sk));
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
-int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
int tcp_connect(struct sock *sk);
enum tcp_synack_type {
TCP_SYNACK_NORMAL,
TCP_SYNACK_FASTOPEN,
TCP_SYNACK_COOKIE,
+ TCP_SYNACK_RETRANS,
};
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
@@ -746,7 +766,15 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_done_with_error(struct sock *sk, int err);
void tcp_reset(struct sock *sk, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
-void tcp_check_space(struct sock *sk);
+void __tcp_check_space(struct sock *sk);
+static inline void tcp_check_space(struct sock *sk)
+{
+ /* pairs with tcp_poll() */
+ smp_mb();
+
+ if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+ __tcp_check_space(sk);
+}
void tcp_sack_compress_send_ack(struct sock *sk);
static inline void tcp_cleanup_skb(struct sk_buff *skb)
@@ -804,6 +832,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
/* tcp.c */
void tcp_get_info(struct sock *, struct tcp_info *);
+void tcp_rate_check_app_limited(struct sock *sk);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
@@ -836,6 +865,14 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
}
+static inline unsigned long tcp_reqsk_timeout(struct request_sock *req)
+{
+ u64 timeout = (u64)req->timeout << req->num_timeout;
+
+ return (unsigned long)min_t(u64, timeout,
+ tcp_rto_max(req->rsk_listener));
+}
+
u32 tcp_delack_max(const struct sock *sk);
/* Compute the actual rto_min value */
@@ -1190,7 +1227,15 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NON_RESTRICTED BIT(0)
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN BIT(1)
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Require successfully negotiated AccECN capability */
+#define TCP_CONG_NEEDS_ACCECN BIT(2)
+/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */
+#define TCP_CONG_ECT_1_NEGOTIATION BIT(3)
+/* Cannot fallback to RFC3168 during AccECN negotiation */
+#define TCP_CONG_NO_FALLBACK_RFC3168 BIT(4)
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \
+ TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION | \
+ TCP_CONG_NO_FALLBACK_RFC3168)
union tcp_cc_info;
@@ -1230,12 +1275,27 @@ struct rate_sample {
struct tcp_congestion_ops {
/* fast path fields are put first to fill one cache line */
+ /* A congestion control (CC) must provide one of either:
+ *
+ * (a) a cong_avoid function, if the CC wants to use the core TCP
+ * stack's default functionality to implement a "classic"
+ * (Reno/CUBIC-style) response to packet loss, RFC3168 ECN,
+ * idle periods, pacing rate computations, etc.
+ *
+ * (b) a cong_control function, if the CC wants custom behavior and
+ * complete control of all congestion control behaviors.
+ */
+ /* (a) "classic" response: calculate new cwnd.
+ */
+ void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
+ /* (b) "custom" response: call when packets are delivered to update
+ * cwnd and pacing rate, after all the ca_state processing.
+ */
+ void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs);
+
/* return slow start threshold (required) */
u32 (*ssthresh)(struct sock *sk);
- /* do new cwnd calculation (required) */
- void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
-
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, u8 new_state);
@@ -1248,15 +1308,9 @@ struct tcp_congestion_ops {
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* override sysctl_tcp_min_tso_segs */
+ /* override sysctl_tcp_min_tso_segs (optional) */
u32 (*min_tso_segs)(struct sock *sk);
- /* call when packets are delivered to update cwnd and pacing rate,
- * after all the ca_state processing. (optional)
- */
- void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs);
-
-
/* new value of cwnd after loss (required) */
u32 (*undo_cwnd)(struct sock *sk);
/* returns the multiplier used in tcp_sndbuf_expand (optional) */
@@ -1322,6 +1376,27 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
}
+static inline bool tcp_ca_needs_accecn(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN;
+}
+
+static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION;
+}
+
+static inline bool tcp_ca_no_fallback_rfc3168(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_NO_FALLBACK_RFC3168;
+}
+
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1333,13 +1408,6 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
/* From tcp_cong.c */
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
-/* From tcp_rate.c */
-void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
-void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
- struct rate_sample *rs);
-void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- bool is_sack_reneg, struct rate_sample *rs);
-void tcp_rate_check_app_limited(struct sock *sk);
static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
{
@@ -1568,8 +1636,14 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
enum skb_drop_reason *reason);
+static inline int tcp_filter(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
+{
+ const struct tcphdr *th = (const struct tcphdr *)skb->data;
+
+ return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th), reason);
+}
-int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason);
void tcp_set_state(struct sock *sk, int state);
void tcp_done(struct sock *sk);
int tcp_abort(struct sock *sk, int err);
@@ -1896,13 +1970,6 @@ struct tcp6_pseudohdr {
__be32 protocol; /* including padding */
};
-union tcp_md5sum_block {
- struct tcp4_pseudohdr ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct tcp6_pseudohdr ip6;
-#endif
-};
-
/*
* struct tcp_sigpool - per-CPU pool of ahash_requests
* @scratch: per-CPU temporary area, that can be used between
@@ -1937,8 +2004,8 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
void tcp_sigpool_end(struct tcp_sigpool *c);
size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len);
/* - functions */
-int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
- const struct sock *sk, const struct sk_buff *skb);
+void tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+ const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen);
@@ -1997,13 +2064,10 @@ static inline void tcp_md5_destruct_sock(struct sock *sk)
}
#endif
-int tcp_md5_alloc_sigpool(void);
-void tcp_md5_release_sigpool(void);
-void tcp_md5_add_sigpool(void);
-extern int tcp_md5_sigpool_id;
-
-int tcp_md5_hash_key(struct tcp_sigpool *hp,
- const struct tcp_md5sig_key *key);
+struct md5_ctx;
+void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb,
+ unsigned int header_len);
+void tcp_md5_hash_key(struct md5_ctx *ctx, const struct tcp_md5sig_key *key);
/* From tcp_fastopen.c */
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
@@ -2310,14 +2374,11 @@ void tcp_v4_destroy_sock(struct sock *sk);
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
-struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb);
struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th);
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct tcphdr *th);
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
#ifdef CONFIG_INET
void tcp_gro_complete(struct sk_buff *skb);
#else
@@ -2353,7 +2414,7 @@ struct tcp_sock_af_ops {
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk,
const struct sock *addr_sk);
- int (*calc_md5_hash)(char *location,
+ void (*calc_md5_hash)(char *location,
const struct tcp_md5sig_key *md5,
const struct sock *sk,
const struct sk_buff *skb);
@@ -2381,7 +2442,7 @@ struct tcp_request_sock_ops {
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk,
const struct sock *addr_sk);
- int (*calc_md5_hash) (char *location,
+ void (*calc_md5_hash) (char *location,
const struct tcp_md5sig_key *md5,
const struct sock *sk,
const struct sk_buff *skb);
@@ -2404,8 +2465,9 @@ struct tcp_request_sock_ops {
struct flowi *fl,
struct request_sock *req,
u32 tw_isn);
- u32 (*init_seq)(const struct sk_buff *skb);
- u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
+ union tcp_seq_and_ts_off (*init_seq_and_ts_off)(
+ const struct net *net,
+ const struct sk_buff *skb);
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,
@@ -2511,10 +2573,7 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
u32 reo_wnd);
extern bool tcp_rack_mark_lost(struct sock *sk);
-extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
- u64 xmit_time);
extern void tcp_rack_reo_timeout(struct sock *sk);
-extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
/* tcp_plb.c */
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index f13e5cd2b1ac..e9a933641636 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -29,8 +29,15 @@ enum tcp_accecn_option {
TCP_ACCECN_OPTION_DISABLED = 0,
TCP_ACCECN_OPTION_MINIMUM = 1,
TCP_ACCECN_OPTION_FULL = 2,
+ TCP_ACCECN_OPTION_PERSIST = 3,
};
+/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
+static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk)
+{
+ __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk));
+}
+
static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
{
/* Do not set CWR if in AccECN mode! */
@@ -60,12 +67,6 @@ static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
}
-/* tp->accecn_fail_mode */
-#define TCP_ACCECN_ACE_FAIL_SEND BIT(0)
-#define TCP_ACCECN_ACE_FAIL_RECV BIT(1)
-#define TCP_ACCECN_OPT_FAIL_SEND BIT(2)
-#define TCP_ACCECN_OPT_FAIL_RECV BIT(3)
-
static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp)
{
return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND;
@@ -91,11 +92,6 @@ static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode)
tp->accecn_fail_mode |= mode;
}
-#define TCP_ACCECN_OPT_NOT_SEEN 0x0
-#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1
-#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2
-#define TCP_ACCECN_OPT_FAIL_SEEN 0x3
-
static inline u8 tcp_accecn_ace(const struct tcphdr *th)
{
return (th->ae << 2) | (th->cwr << 1) | th->ece;
@@ -169,7 +165,9 @@ static inline void tcp_accecn_third_ack(struct sock *sk,
switch (ace) {
case 0x0:
/* Invalid value */
- tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
+ if (!TCP_SKB_CB(skb)->sacked)
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV |
+ TCP_ACCECN_OPT_FAIL_RECV);
break;
case 0x7:
case 0x5:
@@ -398,6 +396,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
tp->received_ce_pending = 0;
__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
__tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
+ tp->accecn_opt_sent_w_dsack = 0;
tp->accecn_minlen = 0;
tp->accecn_opt_demand = 0;
tp->est_ecnfield = 0;
@@ -467,6 +466,26 @@ static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
return TCP_ACCECN_OPT_COUNTER_SEEN;
}
+static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk,
+ const struct sk_buff *skb, u8 dsf)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
+ tp->syn_ect_rcv = dsf & INET_ECN_MASK;
+ /* Demand Accurate ECN option in response to the SYN on the SYN/ACK
+ * and the TCP server will try to send one more packet with an AccECN
+ * Option at a later point during the connection.
+ */
+ if (tp->rx_opt.accecn &&
+ tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
+
+ tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
+ tp->accecn_opt_demand = 2;
+ }
+}
+
/* See Table 2 of the AccECN draft */
static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
const struct tcphdr *th, u8 ip_dsfield)
@@ -489,32 +508,32 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
break;
case 0x1:
- case 0x5:
/* +========+========+============+=============+
* | A | B | SYN/ACK | Feedback |
* | | | B->A | Mode of A |
* | | | AE CWR ECE | |
* +========+========+============+=============+
- * | AccECN | Nonce | 1 0 1 | (Reserved) |
* | AccECN | ECN | 0 0 1 | Classic ECN |
* | Nonce | AccECN | 0 0 1 | Classic ECN |
* | ECN | AccECN | 0 0 1 | Classic ECN |
* +========+========+============+=============+
*/
- if (tcp_ecn_mode_pending(tp))
- /* Downgrade from AccECN, or requested initially */
+ if (tcp_ca_no_fallback_rfc3168(sk))
+ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
+ else
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
break;
- default:
- tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
- tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
- if (tp->rx_opt.accecn &&
- tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
- u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
-
- tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
- tp->accecn_opt_demand = 2;
+ case 0x5:
+ if (tcp_ecn_mode_pending(tp)) {
+ tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
+ if (INET_ECN_is_ce(ip_dsfield)) {
+ tp->received_ce++;
+ tp->received_ce_pending++;
+ }
}
+ break;
+ default:
+ tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
if (INET_ECN_is_ce(ip_dsfield) &&
tcp_accecn_validate_syn_feedback(sk, ace,
tp->syn_ect_snt)) {
@@ -525,9 +544,11 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb
}
}
-static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
+static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th,
const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_ecn_mode_pending(tp)) {
if (!tcp_accecn_syn_requested(th)) {
/* Downgrade to classic ECN feedback */
@@ -539,7 +560,8 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
}
}
- if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
+ if (tcp_ecn_mode_rfc3168(tp) &&
+ (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk)))
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
}
@@ -561,7 +583,7 @@ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk) ||
tcp_bpf_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
@@ -579,7 +601,8 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
bool use_ecn, use_accecn;
u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
- use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
+ use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN ||
+ tcp_ca_needs_accecn(sk);
use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
@@ -595,7 +618,7 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
if (use_ecn) {
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
if (use_accecn) {
@@ -619,12 +642,22 @@ static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
}
static inline void
-tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
+ enum tcp_synack_type synack_type)
{
- if (tcp_rsk(req)->accecn_ok)
- tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
- else if (inet_rsk(req)->ecn_ok)
- th->ece = 1;
+ /* Accurate ECN shall retransmit SYN/ACK with ACE=0 if the
+ * previously retransmitted SYN/ACK also times out.
+ */
+ if (!req->num_timeout || synack_type != TCP_SYNACK_RETRANS) {
+ if (tcp_rsk(req)->accecn_ok)
+ tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
+ else if (inet_rsk(req)->ecn_ok)
+ th->ece = 1;
+ } else if (tcp_rsk(req)->accecn_ok) {
+ th->ae = 0;
+ th->cwr = 0;
+ th->ece = 0;
+ }
}
static inline bool tcp_accecn_option_beacon_check(const struct sock *sk)
diff --git a/include/net/tls.h b/include/net/tls.h
index c7bcdb3afad7..ebd2550280ae 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -53,6 +53,8 @@ struct tls_rec;
/* Maximum data size carried in a TLS record */
#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14)
+/* Minimum record size limit as per RFC8449 */
+#define TLS_MIN_RECORD_SIZE_LIM ((size_t)1 << 6)
#define TLS_HEADER_SIZE 5
#define TLS_NONCE_OFFSET TLS_HEADER_SIZE
@@ -226,6 +228,7 @@ struct tls_context {
u8 rx_conf:3;
u8 zerocopy_sendfile:1;
u8 rx_no_pad:1;
+ u16 tx_max_payload_len;
int (*push_pending_record)(struct sock *sk, int flags);
void (*sk_write_space)(struct sock *sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index cffedb3e40f2..da68702ddf6e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -294,8 +294,7 @@ static inline int udp_lib_init_sock(struct sock *sk)
up->forward_threshold = sk->sk_rcvbuf >> 2;
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
- up->udp_prod_queue = kcalloc(nr_node_ids, sizeof(*up->udp_prod_queue),
- GFP_KERNEL);
+ up->udp_prod_queue = kzalloc_objs(*up->udp_prod_queue, nr_node_ids);
if (!up->udp_prod_queue)
return -ENOMEM;
for (int i = 0; i < nr_node_ids; i++)
@@ -424,7 +423,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, int *karg);
int udp_init_sock(struct sock *sk);
-int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
@@ -527,18 +526,18 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
* SNMP statistics for UDP and UDP-Lite
*/
#define UDP_INC_STATS(net, field, is_udplite) do { \
- if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
+ if (unlikely(is_udplite)) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
#define __UDP_INC_STATS(net, field, is_udplite) do { \
- if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
+ if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \
else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0)
#define __UDP6_INC_STATS(net, field, is_udplite) do { \
- if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\
+ if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \
else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \
} while(0)
#define UDP6_INC_STATS(net, field, __lite) do { \
- if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \
+ if (unlikely(__lite)) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \
else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \
} while(0)
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 9acef2fbd2fd..fc1fc43345b5 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -10,6 +10,11 @@
#include <net/ipv6_stubs.h>
#endif
+#define UDP_TUNNEL_PARTIAL_FEATURES NETIF_F_GSO_ENCAP_ALL
+#define UDP_TUNNEL_STRIPPED_GSO_TYPES ((UDP_TUNNEL_PARTIAL_FEATURES | \
+ NETIF_F_GSO_PARTIAL) >> \
+ NETIF_F_GSO_SHIFT)
+
struct udp_port_cfg {
u8 family;
@@ -47,7 +52,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
{
- return 0;
+ return -EPFNOSUPPORT;
}
#endif
@@ -145,6 +150,33 @@ void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
__be16 src_port, __be16 dst_port, bool nocheck,
u16 ip6cb_flags);
+static inline bool udp_tunnel_handle_partial(struct sk_buff *skb)
+{
+ bool double_encap = !!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL);
+
+ /*
+ * If the skb went through partial segmentation, lower devices
+ * will not need to offload the related features - except for
+ * UDP_TUNNEL, that will be re-added by the later
+ * udp_tunnel_handle_offloads().
+ */
+ if (double_encap)
+ skb_shinfo(skb)->gso_type &= ~UDP_TUNNEL_STRIPPED_GSO_TYPES;
+ return double_encap;
+}
+
+static inline void udp_tunnel_set_inner_protocol(struct sk_buff *skb,
+ bool double_encap,
+ __be16 inner_proto)
+{
+ /*
+ * The inner protocol has been set by the nested tunnel, don't
+ * overraid it.
+ */
+ if (!double_encap)
+ skb_set_inner_protocol(skb, inner_proto);
+}
+
void udp_tunnel_sock_release(struct socket *sock);
struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
diff --git a/include/net/vsock_addr.h b/include/net/vsock_addr.h
index cf8cc140d68d..c3f4cc206198 100644
--- a/include/net/vsock_addr.h
+++ b/include/net/vsock_addr.h
@@ -16,7 +16,7 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr);
void vsock_addr_unbind(struct sockaddr_vm *addr);
bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
const struct sockaddr_vm *other);
-int vsock_addr_cast(const struct sockaddr *addr, size_t len,
+int vsock_addr_cast(const struct sockaddr_unsized *addr, size_t len,
struct sockaddr_vm **out_addr);
#endif
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index ce587a225661..23e8861e8b25 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -125,6 +125,7 @@ struct xsk_tx_metadata_ops {
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(struct list_head *flush_list);
+INDIRECT_CALLABLE_DECLARE(void xsk_destruct_skb(struct sk_buff *));
/**
* xsk_tx_metadata_to_compl - Save enough relevant metadata information
@@ -218,6 +219,12 @@ static inline void __xsk_map_flush(struct list_head *flush_list)
{
}
+#ifdef CONFIG_MITIGATION_RETPOLINE
+static inline void xsk_destruct_skb(struct sk_buff *skb)
+{
+}
+#endif
+
static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
struct xsk_tx_metadata_compl *compl)
{
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 4f2d3268a676..6b9ebae2dc95 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -12,6 +12,10 @@
#define XDP_UMEM_MIN_CHUNK_SHIFT 11
#define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT)
+#define NETDEV_XDP_ACT_XSK (NETDEV_XDP_ACT_BASIC | \
+ NETDEV_XDP_ACT_REDIRECT | \
+ NETDEV_XDP_ACT_XSK_ZEROCOPY)
+
struct xsk_cb_desc {
void *src;
u8 off;
@@ -47,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool);
}
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+ return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool);
+}
+
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
@@ -118,7 +127,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
goto out;
list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
- list_del(&pos->list_node);
+ list_del_init(&pos->list_node);
xp_free(pos);
}
@@ -153,7 +162,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
frag = list_first_entry_or_null(&xskb->pool->xskb_list,
struct xdp_buff_xsk, list_node);
if (frag) {
- list_del(&frag->list_node);
+ list_del_init(&frag->list_node);
ret = &frag->xdp;
}
@@ -164,7 +173,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
- list_del(&xskb->list_node);
+ list_del_init(&xskb->list_node);
}
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
@@ -333,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool)
return 0;
}
+static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool)
+{
+ return 0;
+}
+
static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
struct xdp_rxq_info *rxq)
{
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0a14daaa5dd4..10d3edde6b2f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1156,19 +1156,19 @@ struct xfrm_offload {
#define CRYPTO_INVALID_PROTOCOL 128
/* Used to keep whole l2 header for transport mode GRO */
- __u32 orig_mac_len;
+ __u16 orig_mac_len;
__u8 proto;
__u8 inner_ipproto;
};
struct sec_path {
- int len;
- int olen;
- int verified_cnt;
-
struct xfrm_state *xvec[XFRM_MAX_DEPTH];
struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH];
+
+ u8 len;
+ u8 olen;
+ u8 verified_cnt;
};
struct sec_path *secpath_set(struct sk_buff *skb);
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index cac56e6b0869..0b1abdb99c9e 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -85,11 +85,11 @@ struct xsk_buff_pool {
bool unaligned;
bool tx_sw_csum;
void *addrs;
- /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
- * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
- * sockets share a single cq when the same netdev and queue id is shared.
+ /* Mutual exclusion of the completion ring in the SKB mode.
+ * Protect: NAPI TX thread and sendmsg error paths in the SKB
+ * destructor callback.
*/
- spinlock_t cq_lock;
+ spinlock_t cq_prod_lock;
struct xdp_buff_xsk *free_heads[];
};