diff options
Diffstat (limited to 'include/net')
98 files changed, 2445 insertions, 850 deletions
diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 4f785098c67a..838a94218b59 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -16,6 +16,12 @@ /* Number of requests per row */ #define P9_ROW_MAXTAG 255 +/* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ + + * room for write (16 extra) or read (11 extra) operands. + */ + +#define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ) + /** enum p9_proto_versions - 9P protocol versions * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u * @p9_proto_2000u: 9P2000.u extension @@ -127,6 +133,96 @@ struct p9_client { }; /** + * struct p9_fd_opts - holds client options during parsing + * @msize: maximum data size negotiated by protocol + * @prot-Oversion: 9P protocol version to use + * @trans_mod: module API instantiated with this client + * + * These parsed options get transferred into client in + * apply_client_options() + */ +struct p9_client_opts { + unsigned int msize; + unsigned char proto_version; + struct p9_trans_module *trans_mod; +}; + +/** + * struct p9_fd_opts - per-transport options for fd transport + * @rfd: file descriptor for reading (trans=fd) + * @wfd: file descriptor for writing (trans=fd) + * @port: port to connect to (trans=tcp) + * @privport: port is privileged + */ +struct p9_fd_opts { + int rfd; + int wfd; + u16 port; + bool privport; +}; + +/** + * struct p9_rdma_opts - Collection of mount options for rdma transport + * @port: port of connection + * @privport: Whether a privileged port may be used + * @sq_depth: The requested depth of the SQ. This really doesn't need + * to be any deeper than the number of threads used in the client + * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth + * @timeout: Time to wait in msecs for CM events + */ +struct p9_rdma_opts { + short port; + bool privport; + int sq_depth; + int rq_depth; + long timeout; +}; + +/** + * struct p9_session_opts - holds parsed options for v9fs_session_info + * @flags: session options of type &p9_session_flags + * @nodev: set to 1 to disable device mapping + * @debug: debug level + * @afid: authentication handle + * @cache: cache mode of type &p9_cache_bits + * @cachetag: the tag of the cache associated with this session + * @uname: string user name to mount hierarchy as + * @aname: mount specifier for remote hierarchy + * @dfltuid: default numeric userid to mount hierarchy as + * @dfltgid: default numeric groupid to mount hierarchy as + * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy + * @session_lock_timeout: retry interval for blocking locks + * + * This strucure holds options which are parsed and will be transferred + * to the v9fs_session_info structure when mounted, and therefore largely + * duplicates struct v9fs_session_info. + */ +struct p9_session_opts { + unsigned int flags; + unsigned char nodev; + unsigned short debug; + unsigned int afid; + unsigned int cache; +#ifdef CONFIG_9P_FSCACHE + char *cachetag; +#endif + char *uname; + char *aname; + kuid_t dfltuid; + kgid_t dfltgid; + kuid_t uid; + long session_lock_timeout; +}; + +/* Used by mount API to store parsed mount options */ +struct v9fs_context { + struct p9_client_opts client_opts; + struct p9_fd_opts fd_opts; + struct p9_rdma_opts rdma_opts; + struct p9_session_opts session_opts; +}; + +/** * struct p9_fid - file system entity handle * @clnt: back pointer to instantiating &p9_client * @fid: numeric identifier for this handle @@ -183,7 +279,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, const char *name); int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, struct p9_fid *newdirfid, const char *new_name); -struct p9_client *p9_client_create(const char *dev_name, char *options); +struct p9_client *p9_client_create(struct fs_context *fc); void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 766ec07c9599..a912bbaa862f 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -14,6 +14,13 @@ #define P9_DEF_MIN_RESVPORT (665U) #define P9_DEF_MAX_RESVPORT (1023U) +#define P9_FD_PORT 564 + +#define P9_RDMA_PORT 5640 +#define P9_RDMA_SQ_DEPTH 32 +#define P9_RDMA_RQ_DEPTH 32 +#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ + /** * struct p9_trans_module - transport module interface * @list: used to maintain a list of currently available transports @@ -24,6 +31,9 @@ * we're less flexible when choosing the response message * size in this case * @def: set if this transport should be considered the default + * @supports_vmalloc: set if this transport can work with vmalloc'd buffers + * (non-physically contiguous memory). Transports requiring + * DMA should leave this as false. * @create: member function to create a new connection on this transport * @close: member function to discard a connection on this transport * @request: member function to issue a request to the transport @@ -43,10 +53,11 @@ struct p9_trans_module { char *name; /* name of transport */ int maxsize; /* max message size of transport */ bool pooled_rbuffers; - int def; /* this transport should be default */ + bool def; /* this transport should be default */ + bool supports_vmalloc; /* can work with vmalloc'd buffers */ struct module *owner; int (*create)(struct p9_client *client, - const char *devname, char *args); + struct fs_context *fc); void (*close)(struct p9_client *client); int (*request)(struct p9_client *client, struct p9_req_t *req); int (*cancel)(struct p9_client *client, struct p9_req_t *req); diff --git a/include/net/act_api.h b/include/net/act_api.h index 91a24b5e0b93..d11b79107930 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -70,6 +70,7 @@ struct tc_action { #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) #define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) +#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5)) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. @@ -159,7 +160,7 @@ int tc_action_net_init(struct net *net, struct tc_action_net *tn, { int err = 0; - tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL); + tn->idrinfo = kmalloc_obj(*tn->idrinfo); if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9e5e95988b9e..9e96776945e5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -8,7 +8,8 @@ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ -#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */ +/* TEMP_VALID_LIFETIME default value as specified in RFC 8981 3.8 */ +#define TEMP_VALID_LIFETIME (2*86400) /* 2 days */ #define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */ #define REGEN_MIN_ADVANCE (2) /* 2 seconds */ #define REGEN_MAX_RETRY (3) @@ -347,6 +348,11 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) return rcu_dereference_rtnl(dev->ip6_ptr); } +static inline struct inet6_dev *in6_dev_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->ip6_ptr); +} + static inline struct inet6_dev *__in6_dev_get_rtnl_net(const struct net_device *dev) { return rtnl_net_dereference(dev_net(dev), dev->ip6_ptr); diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d40e978126e3..533d8e75f7bb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/workqueue.h> +#include <net/netns/vsock.h> #include <net/sock.h> #include <uapi/linux/vm_sockets.h> @@ -124,7 +125,7 @@ struct vsock_transport { size_t len, int flags); int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, struct msghdr *, size_t len); - bool (*dgram_allow)(u32 cid, u32 port); + bool (*dgram_allow)(struct vsock_sock *vsk, u32 cid, u32 port); /* STREAM. */ /* TODO: stream_bind() */ @@ -136,14 +137,14 @@ struct vsock_transport { s64 (*stream_has_space)(struct vsock_sock *); u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); - bool (*stream_allow)(u32 cid, u32 port); + bool (*stream_allow)(struct vsock_sock *vsk, u32 cid, u32 port); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, int flags); int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg, size_t len); - bool (*seqpacket_allow)(u32 remote_cid); + bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid); u32 (*seqpacket_has_data)(struct vsock_sock *vsk); /* Notification. */ @@ -216,6 +217,11 @@ void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); +struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr, + struct net *net); +struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + struct net *net); void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(struct vsock_transport *transport, void (*fn)(struct sock *sk)); @@ -256,4 +262,62 @@ static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) { return t->msgzerocopy_allow && t->msgzerocopy_allow(); } + +static inline enum vsock_net_mode vsock_net_mode(struct net *net) +{ + if (!net) + return VSOCK_NET_MODE_GLOBAL; + + return READ_ONCE(net->vsock.mode); +} + +static inline bool vsock_net_mode_global(struct vsock_sock *vsk) +{ + return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; +} + +static inline bool vsock_net_set_child_mode(struct net *net, + enum vsock_net_mode mode) +{ + int new_locked = mode + 1; + int old_locked = 0; /* unlocked */ + + if (try_cmpxchg(&net->vsock.child_ns_mode_locked, + &old_locked, new_locked)) { + WRITE_ONCE(net->vsock.child_ns_mode, mode); + return true; + } + + return old_locked == new_locked; +} + +static inline enum vsock_net_mode vsock_net_child_mode(struct net *net) +{ + return READ_ONCE(net->vsock.child_ns_mode); +} + +/* Return true if two namespaces pass the mode rules. Otherwise, return false. + * + * A NULL namespace is treated as VSOCK_NET_MODE_GLOBAL. + * + * Read more about modes in the comment header of net/vmw_vsock/af_vsock.c. + */ +static inline bool vsock_net_check_mode(struct net *ns0, struct net *ns1) +{ + enum vsock_net_mode mode0, mode1; + + /* Any vsocks within the same network namespace are always reachable, + * regardless of the mode. + */ + if (net_eq(ns0, ns1)) + return true; + + mode0 = vsock_net_mode(ns0); + mode1 = vsock_net_mode(ns1); + + /* Different namespaces are only reachable if they are both + * global mode. + */ + return mode0 == VSOCK_NET_MODE_GLOBAL && mode0 == mode1; +} #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/ax25.h b/include/net/ax25.h index a7bba42dde15..9fc6a6657266 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -116,10 +116,6 @@ enum { AX25_PROTO_STD_DUPLEX, #ifdef CONFIG_AX25_DAMA_SLAVE AX25_PROTO_DAMA_SLAVE, -#ifdef CONFIG_AX25_DAMA_MASTER - AX25_PROTO_DAMA_MASTER, -#define AX25_PROTO_MAX AX25_PROTO_DAMA_MASTER -#endif #endif __AX25_PROTO_MAX, AX25_PROTO_MAX = __AX25_PROTO_MAX -1 @@ -138,7 +134,7 @@ enum { AX25_VALUES_IDLE, /* Connected mode idle timer */ AX25_VALUES_N2, /* Default N2 value */ AX25_VALUES_PACLEN, /* AX.25 MTU */ - AX25_VALUES_PROTOCOL, /* Std AX.25, DAMA Slave, DAMA Master */ + AX25_VALUES_PROTOCOL, /* Std AX.25, DAMA Slave */ #ifdef CONFIG_AX25_DAMA_SLAVE AX25_VALUES_DS_TIMEOUT, /* DAMA Slave timeout */ #endif @@ -215,8 +211,6 @@ typedef struct { unsigned short slave_timeout; /* when? */ } ax25_dama_info; -struct ctl_table; - typedef struct ax25_dev { struct list_head list; @@ -226,7 +220,7 @@ typedef struct ax25_dev { struct net_device *forward; struct ctl_table_header *sysheader; int values[AX25_MAX_VALUES]; -#if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) +#ifdef CONFIG_AX25_DAMA_SLAVE ax25_dama_info dama; #endif refcount_t refcount; diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d46ed9011ee5..69eed69f7f26 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -130,21 +130,30 @@ struct bt_voice { #define BT_RCVMTU 13 #define BT_PHY 14 -#define BT_PHY_BR_1M_1SLOT 0x00000001 -#define BT_PHY_BR_1M_3SLOT 0x00000002 -#define BT_PHY_BR_1M_5SLOT 0x00000004 -#define BT_PHY_EDR_2M_1SLOT 0x00000008 -#define BT_PHY_EDR_2M_3SLOT 0x00000010 -#define BT_PHY_EDR_2M_5SLOT 0x00000020 -#define BT_PHY_EDR_3M_1SLOT 0x00000040 -#define BT_PHY_EDR_3M_3SLOT 0x00000080 -#define BT_PHY_EDR_3M_5SLOT 0x00000100 -#define BT_PHY_LE_1M_TX 0x00000200 -#define BT_PHY_LE_1M_RX 0x00000400 -#define BT_PHY_LE_2M_TX 0x00000800 -#define BT_PHY_LE_2M_RX 0x00001000 -#define BT_PHY_LE_CODED_TX 0x00002000 -#define BT_PHY_LE_CODED_RX 0x00004000 +#define BT_PHY_BR_1M_1SLOT BIT(0) +#define BT_PHY_BR_1M_3SLOT BIT(1) +#define BT_PHY_BR_1M_5SLOT BIT(2) +#define BT_PHY_EDR_2M_1SLOT BIT(3) +#define BT_PHY_EDR_2M_3SLOT BIT(4) +#define BT_PHY_EDR_2M_5SLOT BIT(5) +#define BT_PHY_EDR_3M_1SLOT BIT(6) +#define BT_PHY_EDR_3M_3SLOT BIT(7) +#define BT_PHY_EDR_3M_5SLOT BIT(8) +#define BT_PHY_LE_1M_TX BIT(9) +#define BT_PHY_LE_1M_RX BIT(10) +#define BT_PHY_LE_2M_TX BIT(11) +#define BT_PHY_LE_2M_RX BIT(12) +#define BT_PHY_LE_CODED_TX BIT(13) +#define BT_PHY_LE_CODED_RX BIT(14) + +#define BT_PHY_BREDR_MASK (BT_PHY_BR_1M_1SLOT | BT_PHY_BR_1M_3SLOT | \ + BT_PHY_BR_1M_5SLOT | BT_PHY_EDR_2M_1SLOT | \ + BT_PHY_EDR_2M_3SLOT | BT_PHY_EDR_2M_5SLOT | \ + BT_PHY_EDR_3M_1SLOT | BT_PHY_EDR_3M_3SLOT | \ + BT_PHY_EDR_3M_5SLOT) +#define BT_PHY_LE_MASK (BT_PHY_LE_1M_TX | BT_PHY_LE_1M_RX | \ + BT_PHY_LE_2M_TX | BT_PHY_LE_2M_RX | \ + BT_PHY_LE_CODED_TX | BT_PHY_LE_CODED_RX) #define BT_MODE 15 @@ -173,7 +182,7 @@ struct bt_iso_io_qos { __u32 interval; __u16 latency; __u16 sdu; - __u8 phy; + __u8 phys; __u8 rtn; }; @@ -212,9 +221,9 @@ struct bt_iso_qos { }; }; -#define BT_ISO_PHY_1M 0x01 -#define BT_ISO_PHY_2M 0x02 -#define BT_ISO_PHY_CODED 0x04 +#define BT_ISO_PHY_1M BIT(0) +#define BT_ISO_PHY_2M BIT(1) +#define BT_ISO_PHY_CODED BIT(2) #define BT_ISO_PHY_ANY (BT_ISO_PHY_1M | BT_ISO_PHY_2M | \ BT_ISO_PHY_CODED) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cb4c02d00759..89ad9470fa71 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -647,10 +647,15 @@ enum { #define HCI_LE_EXT_ADV 0x10 #define HCI_LE_PERIODIC_ADV 0x20 #define HCI_LE_CHAN_SEL_ALG2 0x40 +#define HCI_LE_PAST_SENDER 0x01 +#define HCI_LE_PAST_RECEIVER 0x02 #define HCI_LE_CIS_CENTRAL 0x10 #define HCI_LE_CIS_PERIPHERAL 0x20 #define HCI_LE_ISO_BROADCASTER 0x40 #define HCI_LE_ISO_SYNC_RECEIVER 0x80 +#define HCI_LE_LL_EXT_FEATURE 0x80 +#define HCI_LE_CS 0x40 +#define HCI_LE_CS_HOST 0x80 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -1880,6 +1885,15 @@ struct hci_cp_le_set_default_phy { #define HCI_LE_SET_PHY_2M 0x02 #define HCI_LE_SET_PHY_CODED 0x04 +#define HCI_OP_LE_SET_PHY 0x2032 +struct hci_cp_le_set_phy { + __le16 handle; + __u8 all_phys; + __u8 tx_phys; + __u8 rx_phys; + __le16 phy_opts; +} __packed; + #define HCI_OP_LE_SET_EXT_SCAN_PARAMS 0x2041 struct hci_cp_le_set_ext_scan_params { __u8 own_addr_type; @@ -2068,6 +2082,44 @@ struct hci_cp_le_set_privacy_mode { __u8 mode; } __packed; +#define HCI_OP_LE_PAST 0x205a +struct hci_cp_le_past { + __le16 handle; + __le16 service_data; + __le16 sync_handle; +} __packed; + +struct hci_rp_le_past { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_PAST_SET_INFO 0x205b +struct hci_cp_le_past_set_info { + __le16 handle; + __le16 service_data; + __u8 adv_handle; +} __packed; + +struct hci_rp_le_past_set_info { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_PAST_PARAMS 0x205c +struct hci_cp_le_past_params { + __le16 handle; + __u8 mode; + __le16 skip; + __le16 sync_timeout; + __u8 cte_type; +} __packed; + +struct hci_rp_le_past_params { + __u8 status; + __le16 handle; +} __packed; + #define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060 struct hci_rp_le_read_buffer_size_v2 { __u8 status; @@ -2095,8 +2147,8 @@ struct hci_cis_params { __u8 cis_id; __le16 c_sdu; __le16 p_sdu; - __u8 c_phy; - __u8 p_phy; + __u8 c_phys; + __u8 p_phys; __u8 c_rtn; __u8 p_rtn; } __packed; @@ -2215,6 +2267,217 @@ struct hci_cp_le_set_host_feature { __u8 bit_value; } __packed; +#define HCI_OP_LE_READ_ALL_LOCAL_FEATURES 0x2087 +struct hci_rp_le_read_all_local_features { + __u8 status; + __u8 page; + __u8 features[248]; +} __packed; + +#define HCI_OP_LE_READ_ALL_REMOTE_FEATURES 0x2088 +struct hci_cp_le_read_all_remote_features { + __le16 handle; + __u8 pages; +} __packed; + +/* Channel Sounding Commands */ +#define HCI_OP_LE_CS_RD_LOCAL_SUPP_CAP 0x2089 +struct hci_rp_le_cs_rd_local_supp_cap { + __u8 status; + __u8 num_config_supported; + __le16 max_consecutive_procedures_supported; + __u8 num_antennas_supported; + __u8 max_antenna_paths_supported; + __u8 roles_supported; + __u8 modes_supported; + __u8 rtt_capability; + __u8 rtt_aa_only_n; + __u8 rtt_sounding_n; + __u8 rtt_random_payload_n; + __le16 nadm_sounding_capability; + __le16 nadm_random_capability; + __u8 cs_sync_phys_supported; + __le16 subfeatures_supported; + __le16 t_ip1_times_supported; + __le16 t_ip2_times_supported; + __le16 t_fcs_times_supported; + __le16 t_pm_times_supported; + __u8 t_sw_time_supported; + __u8 tx_snr_capability; +} __packed; + +#define HCI_OP_LE_CS_RD_RMT_SUPP_CAP 0x208A +struct hci_cp_le_cs_rd_local_supp_cap { + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_WR_CACHED_RMT_SUPP_CAP 0x208B +struct hci_cp_le_cs_wr_cached_rmt_supp_cap { + __le16 handle; + __u8 num_config_supported; + __le16 max_consecutive_procedures_supported; + __u8 num_antennas_supported; + __u8 max_antenna_paths_supported; + __u8 roles_supported; + __u8 modes_supported; + __u8 rtt_capability; + __u8 rtt_aa_only_n; + __u8 rtt_sounding_n; + __u8 rtt_random_payload_n; + __le16 nadm_sounding_capability; + __le16 nadm_random_capability; + __u8 cs_sync_phys_supported; + __le16 subfeatures_supported; + __le16 t_ip1_times_supported; + __le16 t_ip2_times_supported; + __le16 t_fcs_times_supported; + __le16 t_pm_times_supported; + __u8 t_sw_time_supported; + __u8 tx_snr_capability; +} __packed; + +struct hci_rp_le_cs_wr_cached_rmt_supp_cap { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_SEC_ENABLE 0x208C +struct hci_cp_le_cs_sec_enable { + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_SET_DEFAULT_SETTINGS 0x208D +struct hci_cp_le_cs_set_default_settings { + __le16 handle; + __u8 role_enable; + __u8 cs_sync_ant_sel; + __s8 max_tx_power; +} __packed; + +struct hci_rp_le_cs_set_default_settings { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_RD_RMT_FAE_TABLE 0x208E +struct hci_cp_le_cs_rd_rmt_fae_table { + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_WR_CACHED_RMT_FAE_TABLE 0x208F +struct hci_cp_le_cs_wr_rmt_cached_fae_table { + __le16 handle; + __u8 remote_fae_table[72]; +} __packed; + +struct hci_rp_le_cs_wr_rmt_cached_fae_table { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_CREATE_CONFIG 0x2090 +struct hci_cp_le_cs_create_config { + __le16 handle; + __u8 config_id; + __u8 create_context; + __u8 main_mode_type; + __u8 sub_mode_type; + __u8 min_main_mode_steps; + __u8 max_main_mode_steps; + __u8 main_mode_repetition; + __u8 mode_0_steps; + __u8 role; + __u8 rtt_type; + __u8 cs_sync_phy; + __u8 channel_map[10]; + __u8 channel_map_repetition; + __u8 channel_selection_type; + __u8 ch3c_shape; + __u8 ch3c_jump; + __u8 reserved; +} __packed; + +#define HCI_OP_LE_CS_REMOVE_CONFIG 0x2091 +struct hci_cp_le_cs_remove_config { + __le16 handle; + __u8 config_id; +} __packed; + +#define HCI_OP_LE_CS_SET_CH_CLASSIFICATION 0x2092 +struct hci_cp_le_cs_set_ch_classification { + __u8 ch_classification[10]; +} __packed; + +struct hci_rp_le_cs_set_ch_classification { + __u8 status; +} __packed; + +#define HCI_OP_LE_CS_SET_PROC_PARAM 0x2093 +struct hci_cp_le_cs_set_proc_param { + __le16 handle; + __u8 config_id; + __le16 max_procedure_len; + __le16 min_procedure_interval; + __le16 max_procedure_interval; + __le16 max_procedure_count; + __u8 min_subevent_len[3]; + __u8 max_subevent_len[3]; + __u8 tone_antenna_config_selection; + __u8 phy; + __u8 tx_power_delta; + __u8 preferred_peer_antenna; + __u8 snr_control_initiator; + __u8 snr_control_reflector; +} __packed; + +struct hci_rp_le_cs_set_proc_param { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_OP_LE_CS_SET_PROC_ENABLE 0x2094 +struct hci_cp_le_cs_set_proc_enable { + __le16 handle; + __u8 config_id; + __u8 enable; +} __packed; + +#define HCI_OP_LE_CS_TEST 0x2095 +struct hci_cp_le_cs_test { + __u8 main_mode_type; + __u8 sub_mode_type; + __u8 main_mode_repetition; + __u8 mode_0_steps; + __u8 role; + __u8 rtt_type; + __u8 cs_sync_phy; + __u8 cs_sync_antenna_selection; + __u8 subevent_len[3]; + __le16 subevent_interval; + __u8 max_num_subevents; + __u8 transmit_power_level; + __u8 t_ip1_time; + __u8 t_ip2_time; + __u8 t_fcs_time; + __u8 t_pm_time; + __u8 t_sw_time; + __u8 tone_antenna_config_selection; + __u8 reserved; + __u8 snr_control_initiator; + __u8 snr_control_reflector; + __le16 drbg_nonce; + __u8 channel_map_repetition; + __le16 override_config; + __u8 override_parameters_length; + __u8 override_parameters_data[]; +} __packed; + +struct hci_rp_le_cs_test { + __u8 status; +} __packed; + +#define HCI_OP_LE_CS_TEST_END 0x2096 + /* ---- HCI Events ---- */ struct hci_ev_status { __u8 status; @@ -2800,6 +3063,20 @@ struct hci_evt_le_ext_adv_set_term { __u8 num_evts; } __packed; +#define HCI_EV_LE_PAST_RECEIVED 0x18 +struct hci_ev_le_past_received { + __u8 status; + __le16 handle; + __le16 service_data; + __le16 sync_handle; + __u8 sid; + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 phy; + __le16 interval; + __u8 clock_accuracy; +} __packed; + #define HCI_EVT_LE_CIS_ESTABLISHED 0x19 struct hci_evt_le_cis_established { __u8 status; @@ -2883,6 +3160,138 @@ struct hci_evt_le_big_info_adv_report { __u8 encryption; } __packed; +#define HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE 0x2b +struct hci_evt_le_read_all_remote_features_complete { + __u8 status; + __le16 handle; + __u8 max_pages; + __u8 valid_pages; + __u8 features[248]; +} __packed; + +/* Channel Sounding Events */ +#define HCI_EVT_LE_CS_READ_RMT_SUPP_CAP_COMPLETE 0x2C +struct hci_evt_le_cs_read_rmt_supp_cap_complete { + __u8 status; + __le16 handle; + __u8 num_configs_supp; + __le16 max_consec_proc_supp; + __u8 num_ant_supp; + __u8 max_ant_path_supp; + __u8 roles_supp; + __u8 modes_supp; + __u8 rtt_cap; + __u8 rtt_aa_only_n; + __u8 rtt_sounding_n; + __u8 rtt_rand_payload_n; + __le16 nadm_sounding_cap; + __le16 nadm_rand_cap; + __u8 cs_sync_phys_supp; + __le16 sub_feat_supp; + __le16 t_ip1_times_supp; + __le16 t_ip2_times_supp; + __le16 t_fcs_times_supp; + __le16 t_pm_times_supp; + __u8 t_sw_times_supp; + __u8 tx_snr_cap; +} __packed; + +#define HCI_EVT_LE_CS_READ_RMT_FAE_TABLE_COMPLETE 0x2D +struct hci_evt_le_cs_read_rmt_fae_table_complete { + __u8 status; + __le16 handle; + __u8 remote_fae_table[72]; +} __packed; + +#define HCI_EVT_LE_CS_SECURITY_ENABLE_COMPLETE 0x2E +struct hci_evt_le_cs_security_enable_complete { + __u8 status; + __le16 handle; +} __packed; + +#define HCI_EVT_LE_CS_CONFIG_COMPLETE 0x2F +struct hci_evt_le_cs_config_complete { + __u8 status; + __le16 handle; + __u8 config_id; + __u8 action; + __u8 main_mode_type; + __u8 sub_mode_type; + __u8 min_main_mode_steps; + __u8 max_main_mode_steps; + __u8 main_mode_rep; + __u8 mode_0_steps; + __u8 role; + __u8 rtt_type; + __u8 cs_sync_phy; + __u8 channel_map[10]; + __u8 channel_map_rep; + __u8 channel_sel_type; + __u8 ch3c_shape; + __u8 ch3c_jump; + __u8 reserved; + __u8 t_ip1_time; + __u8 t_ip2_time; + __u8 t_fcs_time; + __u8 t_pm_time; +} __packed; + +#define HCI_EVT_LE_CS_PROCEDURE_ENABLE_COMPLETE 0x30 +struct hci_evt_le_cs_procedure_enable_complete { + __u8 status; + __le16 handle; + __u8 config_id; + __u8 state; + __u8 tone_ant_config_sel; + __s8 sel_tx_pwr; + __u8 sub_evt_len[3]; + __u8 sub_evts_per_evt; + __le16 sub_evt_intrvl; + __le16 evt_intrvl; + __le16 proc_intrvl; + __le16 proc_counter; + __le16 max_proc_len; +} __packed; + +#define HCI_EVT_LE_CS_SUBEVENT_RESULT 0x31 +struct hci_evt_le_cs_subevent_result { + __le16 handle; + __u8 config_id; + __le16 start_acl_conn_evt_counter; + __le16 proc_counter; + __le16 freq_comp; + __u8 ref_pwr_lvl; + __u8 proc_done_status; + __u8 subevt_done_status; + __u8 abort_reason; + __u8 num_ant_paths; + __u8 num_steps_reported; + __u8 step_mode[0]; /* depends on num_steps_reported */ + __u8 step_channel[0]; /* depends on num_steps_reported */ + __u8 step_data_length[0]; /* depends on num_steps_reported */ + __u8 step_data[0]; /* depends on num_steps_reported */ +} __packed; + +#define HCI_EVT_LE_CS_SUBEVENT_RESULT_CONTINUE 0x32 +struct hci_evt_le_cs_subevent_result_continue { + __le16 handle; + __u8 config_id; + __u8 proc_done_status; + __u8 subevt_done_status; + __u8 abort_reason; + __u8 num_ant_paths; + __u8 num_steps_reported; + __u8 step_mode[0]; /* depends on num_steps_reported */ + __u8 step_channel[0]; /* depends on num_steps_reported */ + __u8 step_data_length[0]; /* depends on num_steps_reported */ + __u8 step_data[0]; /* depends on num_steps_reported */ +} __packed; + +#define HCI_EVT_LE_CS_TEST_END_COMPLETE 0x33 +struct hci_evt_le_cs_test_end_complete { + __u8 status; +} __packed; + #define HCI_EV_VENDOR 0xff /* Internal events generated by Bluetooth stack */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0cb87687837f..a7bffb908c1e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -166,6 +166,7 @@ enum hci_conn_flags { HCI_CONN_FLAG_REMOTE_WAKEUP = BIT(0), HCI_CONN_FLAG_DEVICE_PRIVACY = BIT(1), HCI_CONN_FLAG_ADDRESS_RESOLUTION = BIT(2), + HCI_CONN_FLAG_PAST = BIT(3), }; typedef u8 hci_conn_flags_t; @@ -377,7 +378,7 @@ struct hci_dev { __u8 minor_class; __u8 max_page; __u8 features[HCI_MAX_PAGES][8]; - __u8 le_features[8]; + __u8 le_features[248]; __u8 le_accept_list_size; __u8 le_resolv_list_size; __u8 le_num_of_adv_sets; @@ -701,6 +702,7 @@ struct hci_conn { __u8 attempt; __u8 dev_class[3]; __u8 features[HCI_MAX_PAGES][8]; + __u8 le_features[248]; __u16 pkt_type; __u16 link_policy; __u8 key_type; @@ -728,6 +730,8 @@ struct hci_conn { __u16 le_per_adv_data_offset; __u8 le_adv_phy; __u8 le_adv_sec_phy; + __u8 le_tx_def_phys; + __u8 le_rx_def_phys; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; @@ -1570,9 +1574,9 @@ int hci_le_create_cis_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, - u8 role, u16 handle); + u8 dst_type, u8 role, u16 handle); struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, - bdaddr_t *dst, u8 role); + bdaddr_t *dst, u8 dst_type, u8 role); void hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); @@ -1601,6 +1605,7 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base, u16 timeout); +int hci_past_bis(struct hci_conn *conn, bdaddr_t *dst, __u8 dst_type); struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos, u16 timeout); @@ -2053,6 +2058,26 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define sync_recv_capable(dev) \ ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) #define sync_recv_enabled(dev) (le_enabled(dev) && sync_recv_capable(dev)) +#define past_sender_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_PAST_SENDER) +#define past_receiver_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_PAST_RECEIVER) +#define past_capable(dev) \ + (past_sender_capable(dev) || past_receiver_capable(dev)) +#define past_sender_enabled(dev) \ + (le_enabled(dev) && past_sender_capable(dev)) +#define past_receiver_enabled(dev) \ + (le_enabled(dev) && past_receiver_capable(dev)) +#define past_enabled(dev) \ + (past_sender_enabled(dev) || past_receiver_enabled(dev)) +#define ll_ext_feature_capable(dev) \ + ((dev)->le_features[7] & HCI_LE_LL_EXT_FEATURE) + +/* Channel sounding support */ +#define le_cs_capable(dev) \ + ((dev)->le_features[5] & HCI_LE_CS) +#define le_cs_host_capable(dev) \ + ((dev)->le_features[5] & HCI_LE_CS_HOST) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) @@ -2317,6 +2342,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); void *hci_recv_event_data(struct hci_dev *hdev, __u8 event); u32 hci_conn_get_phy(struct hci_conn *conn); +int hci_conn_set_phy(struct hci_conn *conn, u32 phys); /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index e352a4e0ef8d..73e494b2591d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -188,3 +188,9 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_past_sync(struct hci_conn *conn, struct hci_conn *le); + +int hci_le_read_remote_features(struct hci_conn *conn); + +int hci_acl_change_pkt_type(struct hci_conn *conn, u16 pkt_type); +int hci_le_set_phy(struct hci_conn *conn, u8 tx_phys, u8 rx_phys); diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 00e182a22720..010f1a8fd15f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -284,9 +284,9 @@ struct l2cap_conn_rsp { #define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 -#define L2CAP_CR_LE_SCID_IN_USE 0X000A -#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B -#define L2CAP_CR_LE_INVALID_PARAMS 0X000C +#define L2CAP_CR_LE_SCID_IN_USE 0x000A +#define L2CAP_CR_LE_UNACCEPT_PARAMS 0x000B +#define L2CAP_CR_LE_INVALID_PARAMS 0x000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -493,6 +493,8 @@ struct l2cap_ecred_reconf_req { #define L2CAP_RECONF_SUCCESS 0x0000 #define L2CAP_RECONF_INVALID_MTU 0x0001 #define L2CAP_RECONF_INVALID_MPS 0x0002 +#define L2CAP_RECONF_INVALID_CID 0x0003 +#define L2CAP_RECONF_INVALID_PARAMS 0x0004 struct l2cap_ecred_reconf_rsp { __le16 result; @@ -655,8 +657,7 @@ struct l2cap_conn { struct sk_buff *rx_skb; __u32 rx_len; - __u8 tx_ident; - struct mutex ident_lock; + struct ida tx_ida; struct sk_buff_head pending_rx; struct work_struct pending_rx_work; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index f5be96f08b9d..8234915854b6 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -119,6 +119,8 @@ struct mgmt_rp_read_index_list { #define MGMT_SETTING_ISO_BROADCASTER BIT(20) #define MGMT_SETTING_ISO_SYNC_RECEIVER BIT(21) #define MGMT_SETTING_LL_PRIVACY BIT(22) +#define MGMT_SETTING_PAST_SENDER BIT(23) +#define MGMT_SETTING_PAST_RECEIVER BIT(24) #define MGMT_OP_READ_INFO 0x0004 #define MGMT_READ_INFO_SIZE 0 diff --git a/include/net/bonding.h b/include/net/bonding.h index 49edc7da0586..395c6e281c5f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -254,6 +254,7 @@ struct bonding { struct delayed_work ad_work; struct delayed_work mcast_work; struct delayed_work slave_arr_work; + struct delayed_work peer_notify_work; #ifdef CONFIG_DEBUG_FS /* debugging support via debugfs */ struct dentry *debug_dir; @@ -521,13 +522,14 @@ static inline int bond_is_ip6_target_ok(struct in6_addr *addr) static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond, struct slave *slave) { + unsigned long tmp, ret = READ_ONCE(slave->target_last_arp_rx[0]); int i = 1; - unsigned long ret = slave->target_last_arp_rx[0]; - - for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++) - if (time_before(slave->target_last_arp_rx[i], ret)) - ret = slave->target_last_arp_rx[i]; + for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++) { + tmp = READ_ONCE(slave->target_last_arp_rx[i]); + if (time_before(tmp, ret)) + ret = tmp; + } return ret; } @@ -537,7 +539,7 @@ static inline unsigned long slave_last_rx(struct bonding *bond, if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL) return slave_oldest_target_arp_rx(bond, slave); - return slave->last_rx; + return READ_ONCE(slave->last_rx); } static inline void slave_update_last_tx(struct slave *slave) @@ -697,6 +699,7 @@ void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); +bool __bond_xdp_check(int mode, int xmit_policy); bool bond_xdp_check(struct bonding *bond, int mode); void bond_setup(struct net_device *bond_dev); unsigned int bond_get_num_tx_queues(void); @@ -709,6 +712,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); +void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay); void bond_work_init_all(struct bonding *bond); void bond_work_cancel_all(struct bonding *bond); diff --git a/include/net/can.h b/include/net/can.h new file mode 100644 index 000000000000..6db9e826f0e0 --- /dev/null +++ b/include/net/can.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * net/can.h + * + * Definitions for the CAN network socket buffer extensions + * + * Copyright (C) 2026 Oliver Hartkopp <socketcan@hartkopp.net> + * + */ + +#ifndef _NET_CAN_H +#define _NET_CAN_H + +/** + * struct can_skb_ext - skb extensions for CAN specific content + * @can_iif: ifindex of the first interface the CAN frame appeared on + * @can_framelen: cached echo CAN frame length for bql + * @can_gw_hops: can-gw CAN frame time-to-live counter + * @can_ext_flags: CAN skb extensions flags + */ +struct can_skb_ext { + int can_iif; + u16 can_framelen; + u8 can_gw_hops; + u8 can_ext_flags; +}; + +#endif /* _NET_CAN_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 820e299f06b5..fc01de19c798 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7,7 +7,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #include <linux/ethtool.h> @@ -126,6 +126,7 @@ struct wiphy; * @IEEE80211_CHAN_NO_4MHZ: 4 MHz bandwidth is not permitted on this channel. * @IEEE80211_CHAN_NO_8MHZ: 8 MHz bandwidth is not permitted on this channel. * @IEEE80211_CHAN_NO_16MHZ: 16 MHz bandwidth is not permitted on this channel. + * @IEEE80211_CHAN_NO_UHR: UHR operation is not permitted on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -143,6 +144,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_10MHZ = BIT(12), IEEE80211_CHAN_NO_HE = BIT(13), /* can use free bits here */ + IEEE80211_CHAN_NO_UHR = BIT(18), IEEE80211_CHAN_NO_320MHZ = BIT(19), IEEE80211_CHAN_NO_EHT = BIT(20), IEEE80211_CHAN_DFS_CONCURRENT = BIT(21), @@ -429,6 +431,18 @@ struct ieee80211_sta_eht_cap { u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; }; +/** + * struct ieee80211_sta_uhr_cap - STA's UHR capabilities + * @has_uhr: true iff UHR is supported and data is valid + * @mac: fixed MAC capabilities + * @phy: fixed PHY capabilities + */ +struct ieee80211_sta_uhr_cap { + bool has_uhr; + struct ieee80211_uhr_cap_mac mac; + struct ieee80211_uhr_cap_phy phy; +}; + /* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ /* @@ -454,6 +468,7 @@ struct ieee80211_sta_eht_cap { * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a * 6 GHz band channel (and 0 may be valid value). * @eht_cap: STA's EHT capabilities + * @uhr_cap: STA's UHR capabilities * @vendor_elems: vendor element(s) to advertise * @vendor_elems.data: vendor element(s) data * @vendor_elems.len: vendor element(s) length @@ -463,6 +478,7 @@ struct ieee80211_sband_iftype_data { struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; struct ieee80211_sta_eht_cap eht_cap; + struct ieee80211_sta_uhr_cap uhr_cap; struct { const u8 *data; unsigned int len; @@ -685,7 +701,7 @@ ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband, } /** - * ieee80211_get_eht_iftype_cap - return ETH capabilities for an sband's iftype + * ieee80211_get_eht_iftype_cap - return EHT capabilities for an sband's iftype * @sband: the sband to search for the iftype on * @iftype: enum nl80211_iftype * @@ -705,6 +721,26 @@ ieee80211_get_eht_iftype_cap(const struct ieee80211_supported_band *sband, } /** + * ieee80211_get_uhr_iftype_cap - return UHR capabilities for an sband's iftype + * @sband: the sband to search for the iftype on + * @iftype: enum nl80211_iftype + * + * Return: pointer to the struct ieee80211_sta_uhr_cap, or NULL is none found + */ +static inline const struct ieee80211_sta_uhr_cap * +ieee80211_get_uhr_iftype_cap(const struct ieee80211_supported_band *sband, + enum nl80211_iftype iftype) +{ + const struct ieee80211_sband_iftype_data *data = + ieee80211_get_sband_iftype_data(sband, iftype); + + if (data && data->uhr_cap.has_uhr) + return &data->uhr_cap; + + return NULL; +} + +/** * wiphy_read_of_freq_limits - read frequency limits from device tree * * @wiphy: the wireless device to get extra limits for @@ -786,8 +822,7 @@ struct vif_params { * @key: key material * @key_len: length of key material * @cipher: cipher suite selector - * @seq: sequence counter (IV/PN) for TKIP and CCMP keys, only used - * with the get_key() callback, must be in little endian, + * @seq: sequence counter (IV/PN), must be in little endian, * length given by @seq_len. * @seq_len: length of @seq. * @vlan_id: vlan_id for VLAN group key (if nonzero) @@ -975,7 +1010,8 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1, chandef1->center_freq1 == chandef2->center_freq1 && chandef1->freq1_offset == chandef2->freq1_offset && chandef1->center_freq2 == chandef2->center_freq2 && - chandef1->punctured == chandef2->punctured); + chandef1->punctured == chandef2->punctured && + chandef1->s1g_primary_2mhz == chandef2->s1g_primary_2mhz); } /** @@ -1015,6 +1051,7 @@ const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, const struct cfg80211_chan_def *chandef2); + /** * nl80211_chan_width_to_mhz - get the channel width in MHz * @chan_width: the channel width from &enum nl80211_chan_width @@ -1485,6 +1522,7 @@ struct cfg80211_s1g_short_beacon { * @he_cap: HE capabilities (or %NULL if HE isn't enabled) * @eht_cap: EHT capabilities (or %NULL if EHT isn't enabled) * @eht_oper: EHT operation IE (or %NULL if EHT isn't enabled) + * @uhr_oper: UHR operation (or %NULL if UHR isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT * @twt_responder: Enable Target Wait Time @@ -1524,6 +1562,7 @@ struct cfg80211_ap_settings { const struct ieee80211_he_operation *he_oper; const struct ieee80211_eht_cap_elem *eht_cap; const struct ieee80211_eht_operation *eht_oper; + const struct ieee80211_uhr_operation *uhr_oper; bool ht_required, vht_required, he_required, sae_h2e_required; bool twt_responder; u32 flags; @@ -1697,6 +1736,8 @@ struct sta_txpwr { * @eht_capa: EHT capabilities of station * @eht_capa_len: the length of the EHT capabilities * @s1g_capa: S1G capabilities of station + * @uhr_capa: UHR capabilities of the station + * @uhr_capa_len: the length of the UHR capabilities */ struct link_station_parameters { const u8 *mld_mac; @@ -1716,6 +1757,8 @@ struct link_station_parameters { const struct ieee80211_eht_cap_elem *eht_capa; u8 eht_capa_len; const struct ieee80211_s1g_cap *s1g_capa; + const struct ieee80211_uhr_cap *uhr_capa; + u8 uhr_capa_len; }; /** @@ -1784,6 +1827,7 @@ struct cfg80211_ttlm_params { * present/updated * @eml_cap: EML capabilities of this station * @link_sta_params: link related params. + * @epp_peer: EPP peer indication */ struct station_parameters { struct net_device *vlan; @@ -1810,6 +1854,7 @@ struct station_parameters { bool eml_cap_present; u16 eml_cap; struct link_station_parameters link_sta_params; + bool epp_peer; }; /** @@ -1895,6 +1940,11 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information * @RATE_INFO_FLAGS_S1G_MCS: MCS field filled with S1G MCS + * @RATE_INFO_FLAGS_UHR_MCS: UHR MCS information + * @RATE_INFO_FLAGS_UHR_ELR_MCS: UHR ELR MCS was used + * (set together with @RATE_INFO_FLAGS_UHR_MCS) + * @RATE_INFO_FLAGS_UHR_IM: UHR Interference Mitigation + * was used */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1906,6 +1956,9 @@ enum rate_info_flags { RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), RATE_INFO_FLAGS_EHT_MCS = BIT(7), RATE_INFO_FLAGS_S1G_MCS = BIT(8), + RATE_INFO_FLAGS_UHR_MCS = BIT(9), + RATE_INFO_FLAGS_UHR_ELR_MCS = BIT(10), + RATE_INFO_FLAGS_UHR_IM = BIT(11), }; /** @@ -1921,7 +1974,7 @@ enum rate_info_flags { * @RATE_INFO_BW_160: 160 MHz bandwidth * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation * @RATE_INFO_BW_320: 320 MHz bandwidth - * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation + * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT/UHR RU allocation * @RATE_INFO_BW_1: 1 MHz bandwidth * @RATE_INFO_BW_2: 2 MHz bandwidth * @RATE_INFO_BW_4: 4 MHz bandwidth @@ -1952,7 +2005,7 @@ enum rate_info_bw { * * @flags: bitflag of flags from &enum rate_info_flags * @legacy: bitrate in 100kbit/s for 802.11abg - * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G rate + * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G/UHR rate * @nss: number of streams (VHT & HE only) * @bw: bandwidth (from &enum rate_info_bw) * @he_gi: HE guard interval (from &enum nl80211_he_gi) @@ -3220,8 +3273,6 @@ struct cfg80211_auth_request { * if this is %NULL for a link, that link is not requested * @elems: extra elements for the per-STA profile for this link * @elems_len: length of the elements - * @disabled: If set this link should be included during association etc. but it - * should not be used until enabled by the AP MLD. * @error: per-link error code, must be <= 0. If there is an error, then the * operation as a whole must fail. */ @@ -3229,7 +3280,6 @@ struct cfg80211_assoc_link { struct cfg80211_bss *bss; const u8 *elems; size_t elems_len; - bool disabled; int error; }; @@ -3262,6 +3312,7 @@ struct cfg80211_ml_reconf_req { * Drivers shall disable MLO features for the current association if this * flag is not set. * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any) + * @ASSOC_REQ_DISABLE_UHR: Disable UHR */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -3272,6 +3323,7 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_EHT = BIT(5), CONNECT_REQ_MLO_SUPPORT = BIT(6), ASSOC_REQ_SPP_AMSDU = BIT(7), + ASSOC_REQ_DISABLE_UHR = BIT(8), }; /** @@ -4189,6 +4241,7 @@ struct cfg80211_ftm_responder_stats { * @num_bursts_exp: actual number of bursts exponent negotiated * @burst_duration: actual burst duration negotiated * @ftms_per_burst: actual FTMs per burst negotiated + * @burst_period: actual burst period negotiated in units of 100ms * @lci_len: length of LCI information (if present) * @civicloc_len: length of civic location information (if present) * @lci: LCI data (may be %NULL) @@ -4230,6 +4283,7 @@ struct cfg80211_pmsr_ftm_result { u8 num_bursts_exp; u8 burst_duration; u8 ftms_per_burst; + u16 burst_period; s32 rssi_avg; s32 rssi_spread; struct rate_info tx_rate, rx_rate; @@ -4292,7 +4346,9 @@ struct cfg80211_pmsr_result { * @burst_period: burst period to use * @asap: indicates to use ASAP mode * @num_bursts_exp: number of bursts exponent - * @burst_duration: burst duration + * @burst_duration: burst duration. If @trigger_based or @non_trigger_based is + * set, this is the burst duration in milliseconds, and zero means the + * device should pick an appropriate value based on @ftms_per_burst. * @ftms_per_burst: number of FTMs per burst * @ftmr_retries: number of retries for FTM request * @request_lci: request LCI information @@ -4305,6 +4361,8 @@ struct cfg80211_pmsr_result { * EDCA based ranging will be used. * @lmr_feedback: negotiate for I2R LMR feedback. Only valid if either * @trigger_based or @non_trigger_based is set. + * @rsta: Operate as the RSTA in the measurement. Only valid if @lmr_feedback + * and either @trigger_based or @non_trigger_based is set. * @bss_color: the bss color of the responder. Optional. Set to zero to * indicate the driver should set the BSS color. Only valid if * @non_trigger_based or @trigger_based is set. @@ -4320,7 +4378,8 @@ struct cfg80211_pmsr_ftm_request_peer { request_civicloc:1, trigger_based:1, non_trigger_based:1, - lmr_feedback:1; + lmr_feedback:1, + rsta:1; u8 num_bursts_exp; u8 burst_duration; u8 ftms_per_burst; @@ -5640,6 +5699,18 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type); * not limited) * @ftm.trigger_based: trigger based ranging measurement is supported * @ftm.non_trigger_based: non trigger based ranging measurement is supported + * @ftm.support_6ghz: supports ranging in 6 GHz band + * @ftm.max_tx_ltf_rep: maximum number of TX LTF repetitions supported (0 means + * only one LTF, no repetitions) + * @ftm.max_rx_ltf_rep: maximum number of RX LTF repetitions supported (0 means + * only one LTF, no repetitions) + * @ftm.max_tx_sts: maximum number of TX STS supported (zero based) + * @ftm.max_rx_sts: maximum number of RX STS supported (zero based) + * @ftm.max_total_ltf_tx: maximum total number of LTFs that can be transmitted + * (0 means unknown) + * @ftm.max_total_ltf_rx: maximum total number of LTFs that can be received + * (0 means unknown) + * @ftm.support_rsta: supports operating as RSTA in PMSR FTM request */ struct cfg80211_pmsr_capabilities { unsigned int max_peers; @@ -5657,7 +5728,15 @@ struct cfg80211_pmsr_capabilities { request_lci:1, request_civicloc:1, trigger_based:1, - non_trigger_based:1; + non_trigger_based:1, + support_6ghz:1; + u8 max_tx_ltf_rep; + u8 max_rx_ltf_rep; + u8 max_tx_sts; + u8 max_rx_sts; + u8 max_total_ltf_tx; + u8 max_total_ltf_rx; + u8 support_rsta:1; } ftm; }; @@ -5683,9 +5762,13 @@ struct wiphy_iftype_akm_suites { * * @rts_threshold: RTS threshold (dot11RTSThreshold); * -1 (default) = RTS/CTS disabled + * @radio_debugfsdir: Pointer to debugfs directory containing the radio- + * specific parameters. + * NULL (default) = Debugfs directory not created */ struct wiphy_radio_cfg { u32 rts_threshold; + struct dentry *radio_debugfsdir; }; /** @@ -6961,6 +7044,19 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) } /** + * ieee80211_radio_freq_range_valid - Check if the radio supports the + * specified frequency range + * + * @radio: wiphy radio + * @freq: the frequency (in KHz) to be queried + * @width: the bandwidth (in KHz) to be queried + * + * Return: whether or not the given frequency range is valid for the given radio + */ +bool ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio, + u32 freq, u32 width); + +/** * cfg80211_radio_chandef_valid - Check if the radio supports the chandef * * @radio: wiphy radio @@ -9770,6 +9866,21 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy, const struct ieee80211_channel *chan); +/** + * cfg80211_stop_link - stop AP/P2P_GO link if link_id is non-negative or stops + * all links on the interface. + * + * @wiphy: the wiphy + * @wdev: wireless device + * @link_id: valid link ID in case of MLO AP/P2P_GO Operation or else -1 + * @gfp: context flags + * + * If link_id is set during MLO operation, stops only the specified AP/P2P_GO + * link and if link_id is set to -1 or last link is stopped, the entire + * interface is stopped as if AP was stopped, IBSS/mesh left, STA disconnected. + */ +void cfg80211_stop_link(struct wiphy *wiphy, struct wireless_dev *wdev, + int link_id, gfp_t gfp); /** * cfg80211_stop_iface - trigger interface disconnection @@ -9783,8 +9894,11 @@ int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy, * * Note: This doesn't need any locks and is asynchronous. */ -void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, - gfp_t gfp); +static inline void +cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, gfp_t gfp) +{ + cfg80211_stop_link(wiphy, wdev, -1, gfp); +} /** * cfg80211_shutdown_all_interfaces - shut down all interfaces for a wiphy @@ -10118,6 +10232,36 @@ static inline int cfg80211_color_change_notify(struct net_device *dev, } /** + * cfg80211_6ghz_power_type - determine AP regulatory power type + * @control: control flags + * @client_flags: &enum ieee80211_channel_flags for station mode to enable + * SP to LPI fallback, zero otherwise. + * + * Return: regulatory power type from &enum ieee80211_ap_reg_power + */ +static inline enum ieee80211_ap_reg_power +cfg80211_6ghz_power_type(u8 control, u32 client_flags) +{ + switch (u8_get_bits(control, IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { + case IEEE80211_6GHZ_CTRL_REG_LPI_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP: + case IEEE80211_6GHZ_CTRL_REG_AP_ROLE_NOT_RELEVANT: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD: + return IEEE80211_REG_LPI_AP; + case IEEE80211_6GHZ_CTRL_REG_SP_AP: + return IEEE80211_REG_SP_AP; + case IEEE80211_6GHZ_CTRL_REG_VLP_AP: + return IEEE80211_REG_VLP_AP; + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: + if (client_flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT) + return IEEE80211_REG_LPI_AP; + return IEEE80211_REG_SP_AP; + default: + return IEEE80211_REG_UNSET_AP; + } +} + +/** * cfg80211_links_removed - Notify about removed STA MLD setup links. * @dev: network device. * @link_mask: BIT mask of removed STA MLD setup link IDs. diff --git a/include/net/devlink.h b/include/net/devlink.h index 9e824f61e40f..cb839e0435a1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -479,6 +479,10 @@ struct devlink_flash_notify { * @set: set parameter value, used for runtime and permanent * configuration modes * @validate: validate input value is applicable (within value range, etc.) + * @get_default: get parameter default value, used for runtime and permanent + * configuration modes + * @reset_default: reset parameter to default value, used for runtime and permanent + * configuration modes * * This struct should be used by the driver to fill the data for * a parameter it registers. @@ -490,13 +494,20 @@ struct devlink_param { enum devlink_param_type type; unsigned long supported_cmodes; int (*get)(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx); + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); int (*set)(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx, struct netlink_ext_ack *extack); int (*validate)(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack); + int (*get_default)(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); + int (*reset_default)(struct devlink *devlink, u32 id, + enum devlink_param_cmode cmode, + struct netlink_ext_ack *extack); }; struct devlink_param_item { @@ -508,6 +519,7 @@ struct devlink_param_item { * until reload. */ bool driverinit_value_new_valid; + union devlink_param_value driverinit_default; }; enum devlink_param_generic_id { @@ -532,6 +544,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS, DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS, + DEVLINK_PARAM_GENERIC_ID_MAX_MAC_PER_VF, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -602,6 +615,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_NAME "num_doorbells" #define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_NAME "max_mac_per_vf" +#define DEVLINK_PARAM_GENERIC_MAX_MAC_PER_VF_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -625,6 +641,37 @@ enum devlink_param_generic_id { .validate = _validate, \ } +#define DEVLINK_PARAM_GENERIC_WITH_DEFAULTS(_id, _cmodes, _get, _set, \ + _validate, _get_default, \ + _reset_default) \ +{ \ + .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ + .name = DEVLINK_PARAM_GENERIC_##_id##_NAME, \ + .type = DEVLINK_PARAM_GENERIC_##_id##_TYPE, \ + .generic = true, \ + .supported_cmodes = _cmodes, \ + .get = _get, \ + .set = _set, \ + .validate = _validate, \ + .get_default = _get_default, \ + .reset_default = _reset_default, \ +} + +#define DEVLINK_PARAM_DRIVER_WITH_DEFAULTS(_id, _name, _type, _cmodes, \ + _get, _set, _validate, \ + _get_default, _reset_default) \ +{ \ + .id = _id, \ + .name = _name, \ + .type = _type, \ + .supported_cmodes = _cmodes, \ + .get = _get, \ + .set = _set, \ + .validate = _validate, \ + .get_default = _get_default, \ + .reset_default = _reset_default, \ +} + /* Identifier of board design */ #define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID "board.id" /* Revision of board design */ diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 58d91ccc56e0..a7b7abd66e21 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -67,6 +67,7 @@ FN(TC_EGRESS) \ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ + FN(QDISC_BURST_DROP) \ FN(QDISC_OVERLIMIT) \ FN(QDISC_CONGESTED) \ FN(CAKE_FLOOD) \ @@ -375,6 +376,11 @@ enum skb_drop_reason { */ SKB_DROP_REASON_QDISC_DROP, /** + * @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst + * limit is hit. + */ + SKB_DROP_REASON_QDISC_BURST_DROP, + /** * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc * instance exceeds its total buffer size limit. */ diff --git a/include/net/dsa.h b/include/net/dsa.h index d73ea0880066..6c17446f3dcc 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -55,6 +55,9 @@ struct tc_action; #define DSA_TAG_PROTO_LAN937X_VALUE 27 #define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 #define DSA_TAG_PROTO_BRCM_LEGACY_FCS_VALUE 29 +#define DSA_TAG_PROTO_YT921X_VALUE 30 +#define DSA_TAG_PROTO_MXL_GSW1XX_VALUE 31 +#define DSA_TAG_PROTO_MXL862_VALUE 32 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -87,6 +90,9 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, + DSA_TAG_PROTO_YT921X = DSA_TAG_PROTO_YT921X_VALUE, + DSA_TAG_PROTO_MXL_GSW1XX = DSA_TAG_PROTO_MXL_GSW1XX_VALUE, + DSA_TAG_PROTO_MXL862 = DSA_TAG_PROTO_MXL862_VALUE, }; struct dsa_switch; @@ -212,12 +218,6 @@ struct dsa_mall_mirror_tc_entry { bool ingress; }; -/* TC port policer entry */ -struct dsa_mall_policer_tc_entry { - u32 burst; - u64 rate_bytes_per_sec; -}; - /* TC matchall entry */ struct dsa_mall_tc_entry { struct list_head list; @@ -225,7 +225,7 @@ struct dsa_mall_tc_entry { enum dsa_port_mall_action_type type; union { struct dsa_mall_mirror_tc_entry mirror; - struct dsa_mall_policer_tc_entry policer; + struct flow_action_police policer; }; }; @@ -298,6 +298,7 @@ struct dsa_port { struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; + netdevice_tracker conduit_tracker; struct dsa_lag *lag; struct net_device *hsr_dev; @@ -1105,7 +1106,7 @@ struct dsa_switch_ops { void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, - struct dsa_mall_policer_tc_entry *policer); + const struct flow_action_police *policer); void (*port_policer_del)(struct dsa_switch *ds, int port); int (*port_setup_tc)(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data); @@ -1247,7 +1248,8 @@ struct dsa_switch_ops { dsa_devlink_param_get, dsa_devlink_param_set, NULL) int dsa_devlink_param_get(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx); + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack); int dsa_devlink_param_set(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx, struct netlink_ext_ack *extack); @@ -1310,11 +1312,6 @@ static inline int dsa_devlink_port_to_port(struct devlink_port *port) return port->index; } -struct dsa_switch_driver { - struct list_head list; - const struct dsa_switch_ops *ops; -}; - bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); @@ -1322,6 +1319,15 @@ bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); +int dsa_port_simple_hsr_validate(struct dsa_switch *ds, int port, + struct net_device *hsr, + struct netlink_ext_ack *extack); +int dsa_port_simple_hsr_join(struct dsa_switch *ds, int port, + struct net_device *hsr, + struct netlink_ext_ack *extack); +int dsa_port_simple_hsr_leave(struct dsa_switch *ds, int port, + struct net_device *hsr); + /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) { diff --git a/include/net/dst.h b/include/net/dst.h index f8aa1239b4db..307073eae7f8 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -219,6 +219,12 @@ static inline u32 dst_mtu(const struct dst_entry *dst) return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); } +/* Variant of dst_mtu() for IPv4 users. */ +static inline u32 dst4_mtu(const struct dst_entry *dst) +{ + return INDIRECT_CALL_1(dst->ops->mtu, ipv4_mtu, dst); +} + /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metric) { diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 596ab9791e4d..70a02ee14308 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -231,6 +231,21 @@ struct flow_action_cookie *flow_action_cookie_create(void *data, gfp_t gfp); void flow_action_cookie_destroy(struct flow_action_cookie *cookie); +struct flow_action_police { + u32 burst; + u64 rate_bytes_ps; + u64 peakrate_bytes_ps; + u32 avrate; + u16 overhead; + u64 burst_pkt; + u64 rate_pkt_ps; + u32 mtu; + struct { + enum flow_action_id act_id; + u32 extval; + } exceed, notexceed; +}; + struct flow_action_entry { enum flow_action_id id; u32 hw_index; @@ -275,20 +290,7 @@ struct flow_action_entry { u32 trunc_size; bool truncate; } sample; - struct { /* FLOW_ACTION_POLICE */ - u32 burst; - u64 rate_bytes_ps; - u64 peakrate_bytes_ps; - u32 avrate; - u16 overhead; - u64 burst_pkt; - u64 rate_pkt_ps; - u32 mtu; - struct { - enum flow_action_id act_id; - u32 extval; - } exceed, notexceed; - } police; + struct flow_action_police police; /* FLOW_ACTION_POLICE */ struct { /* FLOW_ACTION_CT */ int action; u16 zone; @@ -526,7 +528,7 @@ static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags, * * Return: true if control flags are set, false otherwise. */ -static inline bool flow_rule_match_has_control_flags(struct flow_rule *rule, +static inline bool flow_rule_match_has_control_flags(const struct flow_rule *rule, struct netlink_ext_ack *extack) { struct flow_match_control match; @@ -718,7 +720,7 @@ struct flow_offload_action { struct flow_offload_action *offload_action_alloc(unsigned int num_actions); static inline struct flow_rule * -flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) +flow_cls_offload_flow_rule(const struct flow_cls_offload *flow_cmd) { return flow_cmd->rule; } diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 9467e33dfb36..8aca881937da 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -358,7 +358,7 @@ static int fq_init(struct fq *fq, int flows_cnt) fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ - fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + fq->flows = kvzalloc_objs(fq->flows[0], fq->flows_cnt); if (!fq->flows) return -ENOMEM; diff --git a/include/net/gro.h b/include/net/gro.h index e3affb2e2ca8..2300b6da05b2 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -405,9 +405,8 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); +struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *); +int udp6_gro_complete(struct sk_buff *, int); #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ @@ -593,4 +592,31 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int * struct packet_offload *gro_find_receive_by_type(__be16 type); struct packet_offload *gro_find_complete_by_type(__be16 type); +static inline struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) +{ + unsigned int thlen, hlen, off; + struct tcphdr *th; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header(skb, hlen, off); + if (unlikely(!th)) + return NULL; + + thlen = th->doff * 4; + if (unlikely(thlen < sizeof(*th))) + return NULL; + + hlen = off + thlen; + if (!skb_gro_may_pull(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + return NULL; + } + + skb_gro_pull(skb, thlen); + + return th; +} + #endif /* _NET_GRO_H */ diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 4acec191c54a..6632b1aa7584 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -42,6 +42,7 @@ struct net_hotdata { int netdev_budget_usecs; int tstamp_prequeue; int max_backlog; + int qdisc_max_burst; int dev_tx_weight; int dev_rx_weight; int sysctl_max_skb_frags; diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 813e163ce27c..c60867e7e43c 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017 Intel Deutschland GmbH - * Copyright (c) 2018-2019, 2021-2022 Intel Corporation + * Copyright (c) 2018-2019, 2021-2022, 2025 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -202,6 +202,24 @@ enum ieee80211_radiotap_vht_coding { IEEE80211_RADIOTAP_CODING_LDPC_USER3 = 0x08, }; +enum ieee80211_radiotap_vht_bandwidth { + /* Note: more values are defined but can't really be used */ + IEEE80211_RADIOTAP_VHT_BW_20 = 0, + IEEE80211_RADIOTAP_VHT_BW_40 = 1, + IEEE80211_RADIOTAP_VHT_BW_80 = 4, + IEEE80211_RADIOTAP_VHT_BW_160 = 11, +}; + +struct ieee80211_radiotap_vht { + __le16 known; + u8 flags; + u8 bandwidth; + u8 mcs_nss[4]; + u8 coding; + u8 group_id; + __le16 partial_aid; +} __packed; + /* for IEEE80211_RADIOTAP_TIMESTAMP */ enum ieee80211_radiotap_timestamp_unit_spos { IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK = 0x000F, diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 745891d2e113..ece8dabd209a 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -18,7 +18,9 @@ struct sk_buff; struct sock; struct sockaddr; -struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, +struct dst_entry *inet6_csk_route_req(const struct sock *sk, + struct dst_entry *dst, + struct flowi6 *fl6, const struct request_sock *req, u8 proto); int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 282e29237d93..c16de5b7963f 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk, { if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; diff --git a/include/net/inet_common.h b/include/net/inet_common.h index c17a6585d0b0..5dd2bf24449e 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -19,15 +19,14 @@ struct msghdr; struct net; struct page; struct sock; -struct sockaddr; struct socket; int inet_release(struct socket *sock); -int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, +int inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags); -int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, +int __inet_stream_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags, int is_sendmsg); -int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, +int inet_dgram_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg); @@ -42,8 +41,8 @@ int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); int __inet_listen_sk(struct sock *sk, int backlog); void inet_sock_destruct(struct sock *sk); -int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); -int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int inet_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); +int inet_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); /* Don't allocate port at this moment, defer to connect. */ #define BIND_FORCE_ADDRESS_NO_PORT (1 << 0) /* Grab and release socket lock. */ @@ -52,7 +51,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); #define BIND_FROM_BPF (1 << 2) /* Skip CAP_NET_BIND_SERVICE check. */ #define BIND_NO_CAP_NET_BIND_SERVICE (1 << 3) -int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, +int __inet_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags); int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b4b886647607..5cb3056d6ddc 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -42,7 +42,9 @@ struct inet_connection_sock_af_ops { struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); u16 net_header_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); @@ -56,7 +58,9 @@ struct inet_connection_sock_af_ops { * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node * @icsk_bind2_hash: Bind node in the bhash2 table - * @icsk_retransmit_timer: Resend (no ack) + * @icsk_delack_timer: Delayed ACK timer + * @icsk_keepalive_timer: Keepalive timer + * @mptcp_tout_timer: mptcp timer * @icsk_rto: Retransmit timeout * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook @@ -81,8 +85,11 @@ struct inet_connection_sock { struct request_sock_queue icsk_accept_queue; struct inet_bind_bucket *icsk_bind_hash; struct inet_bind2_bucket *icsk_bind2_hash; - struct timer_list icsk_retransmit_timer; - struct timer_list icsk_delack_timer; + struct timer_list icsk_delack_timer; + union { + struct timer_list icsk_keepalive_timer; + struct timer_list mptcp_tout_timer; + }; __u32 icsk_rto; __u32 icsk_rto_min; u32 icsk_rto_max; @@ -184,10 +191,9 @@ static inline void inet_csk_delack_init(struct sock *sk) memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } -static inline unsigned long -icsk_timeout(const struct inet_connection_sock *icsk) +static inline unsigned long tcp_timeout_expires(const struct sock *sk) { - return READ_ONCE(icsk->icsk_retransmit_timer.expires); + return READ_ONCE(sk->tcp_retransmit_timer.expires); } static inline unsigned long @@ -203,7 +209,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { smp_store_release(&icsk->icsk_pending, 0); #ifdef INET_CSK_CLEAR_TIMERS - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer(sk, &sk->tcp_retransmit_timer); #endif } else if (what == ICSK_TIME_DACK) { smp_store_release(&icsk->icsk_ack.pending, 0); @@ -235,7 +241,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { smp_store_release(&icsk->icsk_pending, what); - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, when); + sk_reset_timer(sk, &sk->tcp_retransmit_timer, when); } else if (what == ICSK_TIME_DACK) { smp_store_release(&icsk->icsk_ack.pending, icsk->icsk_ack.pending | ICSK_ACK_TIMER); @@ -267,8 +273,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); -bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - unsigned long timeout); +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, bool own_req); @@ -291,14 +296,6 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); -static inline unsigned long -reqsk_timeout(struct request_sock *req, unsigned long max_timeout) -{ - u64 timeout = (u64)req->timeout << req->num_timeout; - - return (unsigned long)min_t(u64, timeout, max_timeout); -} - void inet_csk_destroy_sock(struct sock *sk); void inet_csk_prepare_for_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index ea32393464a2..827b87a95dab 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) return outer; } +/* Apply either ECT(0) or ECT(1) */ +static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1) +{ + __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0; + + /* Mask the complete byte in case the connection alternates between + * ECT(0) and ECT(1). + */ + inet_sk(sk)->tos &= ~INET_ECN_MASK; + inet_sk(sk)->tos |= ect; + if (inet6_sk(sk)) { + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; + inet6_sk(sk)->tclass |= ect; + } +} + static inline void INET_ECN_xmit(struct sock *sk) { - inet_sk(sk)->tos |= INET_ECN_ECT_0; - if (inet6_sk(sk) != NULL) - inet6_sk(sk)->tclass |= INET_ECN_ECT_0; + __INET_ECN_xmit(sk, false); } static inline void INET_ECN_dontxmit(struct sock *sk) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0eccd9c3a883..365925c9d262 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -123,27 +123,15 @@ void inet_frags_fini(struct inet_frags *); int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); -static inline void fqdir_pre_exit(struct fqdir *fqdir) -{ - /* Prevent creation of new frags. - * Pairs with READ_ONCE() in inet_frag_find(). - */ - WRITE_ONCE(fqdir->high_thresh, 0); - - /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() - * and ip6frag_expire_frag_queue(). - */ - WRITE_ONCE(fqdir->dead, true); -} +void fqdir_pre_exit(struct fqdir *fqdir); void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q, int *refs); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); -/* Free all skbs in the queue; return the sum of their truesizes. */ -unsigned int inet_frag_rbtree_purge(struct rb_root *root, - enum skb_drop_reason reason); +void inet_frag_queue_flush(struct inet_frag_queue *q, + enum skb_drop_reason reason); static inline void inet_frag_putn(struct inet_frag_queue *q, int refs) { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ac05a52d9e13..5a979dcab538 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -345,7 +345,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, int dif, int sdif) { if (!net_eq(sock_net(sk), net) || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || sk->sk_addrpair != cookie) return false; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1086256549fa..7cdcbed3e5cb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -26,6 +26,8 @@ #include <net/tcp_states.h> #include <net/l3mdev.h> +#define IP_OPTIONS_DATA_FIXED_SIZE 40 + /** struct ip_options - IP Options * * @faddr - Saved first hop address @@ -58,12 +60,9 @@ struct ip_options { struct ip_options_rcu { struct rcu_head rcu; - struct ip_options opt; -}; -struct ip_options_data { - struct ip_options_rcu opt; - char data[40]; + /* Must be last as it ends in a flexible-array member. */ + struct ip_options opt; }; struct inet_request_sock { @@ -101,10 +100,7 @@ struct inet_request_sock { }; }; -static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) -{ - return (struct inet_request_sock *)sk; -} +#define inet_rsk(ptr) container_of_const(ptr, struct inet_request_sock, req) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { @@ -163,6 +159,13 @@ static inline bool inet_sk_bound_dev_eq(const struct net *net, #endif } +struct inet6_cork { + struct ipv6_txoptions *opt; + u8 hop_limit; + u8 tclass; + u8 dontfrag:1; +}; + struct inet_cork { unsigned int flags; __be32 addr; @@ -183,6 +186,9 @@ struct inet_cork { struct inet_cork_full { struct inet_cork base; struct flowi fl; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_cork base6; +#endif }; struct ip_mc_socklist; @@ -214,6 +220,7 @@ struct inet_sock { struct sock sk; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6; + struct ipv6_fl_socklist __rcu *ipv6_fl_list; #endif /* Socket demultiplex comparisons on incoming packets. */ #define inet_daddr sk.__sk_common.skc_daddr @@ -354,14 +361,6 @@ static inline struct sock *skb_to_full_sk(const struct sk_buff *skb) #define inet_sk(ptr) container_of_const(ptr, struct inet_sock, sk) -static inline void __inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from, - const int ancestor_size) -{ - memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, - sk_from->sk_prot->obj_size - ancestor_size); -} - int inet_sk_rebuild_header(struct sock *sk); /** diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 2cbbee6e806a..a75912fe247e 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -60,6 +60,8 @@ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_trace_hdr *trace, bool is_input); +u8 ioam6_trace_compute_nodelen(u32 trace_type); + int ioam6_init(void); void ioam6_exit(void); diff --git a/include/net/ip.h b/include/net/ip.h index 380afb691c41..7f9abd457e01 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; - ipcm->protocol = inet->inet_num; + ipcm->protocol = READ_ONCE(inet->inet_num); } #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) @@ -261,8 +261,8 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, } /* datagram.c */ -int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int __ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); +int ip4_datagram_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7c5512baa4b2..a55f9bf95fe3 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -266,6 +266,12 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); +/* Variant of dst_mtu() for IPv6 users */ +static inline u32 dst6_mtu(const struct dst_entry *dst) +{ + return INDIRECT_CALL_1(dst->ops->mtu, ip6_mtu, dst); +} + static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) { const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 120db2865811..359b595f1df9 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -156,6 +156,18 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { + if (dev) { + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + DEV_STATS_INC(dev, tx_errors); + } + kfree_skb(skb); + return; + } + + dev_xmit_recursion_inc(); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->flags = ip6cb_flags; pkt_len = skb->len - skb_inner_network_offset(skb); @@ -166,6 +178,8 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, pkt_len = -1; iptunnel_xmit_stats(dev, pkt_len); } + + dev_xmit_recursion_dec(); } #endif #endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b4495c38e0a0..318593743b6e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, siphash_aligned_key_t hash_key; u32 mp_seed; - mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed); fib_multipath_hash_construct_key(&hash_key, mp_seed); return flow_hash_from_keys_seed(keys, &hash_key); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index ecae35512b9b..1f577a4f8ce9 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -19,6 +19,7 @@ #include <net/rtnetlink.h> #include <net/lwtunnel.h> #include <net/dst_cache.h> +#include <net/netdev_lock.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -26,6 +27,13 @@ #include <net/ip6_route.h> #endif +/* Recursion limit for tunnel xmit to detect routing loops. + * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel + * recursion involves route lookups and full IP output, consuming much + * more stack per level, so a lower limit is needed. + */ +#define IP_TUNNEL_RECURSION_LIMIT 4 + /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) @@ -372,7 +380,17 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->flowi4_flags = flow_flags; } -int ip_tunnel_init(struct net_device *dev); +int __ip_tunnel_init(struct net_device *dev); +#define ip_tunnel_init(DEV) \ +({ \ + struct net_device *__dev = (DEV); \ + int __res = __ip_tunnel_init(__dev); \ + \ + if (!__res) \ + netdev_lockdep_set_classes(__dev);\ + __res; \ +}) + void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); struct net *ip_tunnel_get_link_net(const struct net_device *dev); @@ -647,13 +665,29 @@ static inline int iptunnel_pull_offloads(struct sk_buff *skb) static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); - - u64_stats_update_begin(&tstats->syncp); - u64_stats_add(&tstats->tx_bytes, pkt_len); - u64_stats_inc(&tstats->tx_packets); - u64_stats_update_end(&tstats->syncp); - put_cpu_ptr(tstats); + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) { + struct pcpu_dstats *dstats = get_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_add(&dstats->tx_bytes, pkt_len); + u64_stats_inc(&dstats->tx_packets); + u64_stats_update_end(&dstats->syncp); + put_cpu_ptr(dstats); + return; + } + if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) { + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + u64_stats_add(&tstats->tx_bytes, pkt_len); + u64_stats_inc(&tstats->tx_packets); + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); + return; + } + pr_err_once("iptunnel_xmit_stats pcpu_stat_type=%d\n", + dev->pcpu_stat_type); + WARN_ON_ONCE(1); return; } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2ccdf85f34f1..53c5056508be 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -25,8 +25,6 @@ struct ip_tunnel_info; #define SIN6_LEN_RFC2133 24 -#define IPV6_MAXPLEN 65535 - /* * NextHeader field of IPv6 header */ @@ -151,17 +149,6 @@ struct frag_hdr { __be32 identification; }; -/* - * Jumbo payload option, as described in RFC 2675 2. - */ -struct hop_jumbo_hdr { - u8 nexthdr; - u8 hdrlen; - u8 tlv_type; /* IPV6_TLV_JUMBO, 0xC2 */ - u8 tlv_len; /* 4 */ - __be32 jumbo_payload_len; -}; - #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 @@ -464,72 +451,6 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); -/* This helper is specialized for BIG TCP needs. - * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. - * It assumes headers are already in skb->head. - * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there. - */ -static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) -{ - const struct hop_jumbo_hdr *jhdr; - const struct ipv6hdr *nhdr; - - if (likely(skb->len <= GRO_LEGACY_MAX_SIZE)) - return 0; - - if (skb->protocol != htons(ETH_P_IPV6)) - return 0; - - if (skb_network_offset(skb) + - sizeof(struct ipv6hdr) + - sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) - return 0; - - nhdr = ipv6_hdr(skb); - - if (nhdr->nexthdr != NEXTHDR_HOP) - return 0; - - jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); - if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || - jhdr->nexthdr != IPPROTO_TCP) - return 0; - return jhdr->nexthdr; -} - -/* Return 0 if HBH header is successfully removed - * Or if HBH removal is unnecessary (packet is not big TCP) - * Return error to indicate dropping the packet - */ -static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) -{ - const int hophdr_len = sizeof(struct hop_jumbo_hdr); - int nexthdr = ipv6_has_hopopt_jumbo(skb); - struct ipv6hdr *h6; - - if (!nexthdr) - return 0; - - if (skb_cow_head(skb, 0)) - return -1; - - /* Remove the HBH header. - * Layout: [Ethernet header][IPv6 header][HBH][L4 Header] - */ - memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb), - skb_network_header(skb) - skb_mac_header(skb) + - sizeof(struct ipv6hdr)); - - __skb_pull(skb, hophdr_len); - skb->network_header += hophdr_len; - skb->mac_header += hophdr_len; - - h6 = ipv6_hdr(skb); - h6->nexthdr = nexthdr; - - return 0; -} - static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); @@ -931,10 +852,10 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, #if IS_ENABLED(CONFIG_IPV6) -static inline bool ipv6_can_nonlocal_bind(struct net *net, - struct inet_sock *inet) +static inline bool ipv6_can_nonlocal_bind(const struct net *net, + const struct inet_sock *inet) { - return net->ipv6.sysctl.ip_nonlocal_bind || + return READ_ONCE(net->ipv6.sysctl.ip_nonlocal_bind) || test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) || test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags); } @@ -949,10 +870,12 @@ static inline bool ipv6_can_nonlocal_bind(struct net *net, #define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OPTOUT -static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, +static inline __be32 ip6_make_flowlabel(const struct net *net, + struct sk_buff *skb, __be32 flowlabel, bool autolabel, struct flowi6 *fl6) { + u8 auto_flowlabels; u32 hash; /* @flowlabel may include more than a flow label, eg, the traffic class. @@ -960,10 +883,12 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, */ flowlabel &= IPV6_FLOWLABEL_MASK; - if (flowlabel || - net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || - (!autolabel && - net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) + if (flowlabel) + return flowlabel; + + auto_flowlabels = READ_ONCE(net->ipv6.sysctl.auto_flowlabels); + if (auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF || + (!autolabel && auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED)) return flowlabel; hash = skb_get_hash_flowi6(skb, fl6); @@ -976,15 +901,15 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; - if (net->ipv6.sysctl.flowlabel_state_ranges) + if (READ_ONCE(net->ipv6.sysctl.flowlabel_state_ranges)) flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; return flowlabel; } -static inline int ip6_default_np_autolabel(struct net *net) +static inline int ip6_default_np_autolabel(const struct net *net) { - switch (net->ipv6.sysctl.auto_flowlabels) { + switch (READ_ONCE(net->ipv6.sysctl.auto_flowlabels)) { case IP6_AUTO_FLOW_LABEL_OFF: case IP6_AUTO_FLOW_LABEL_OPTIN: default: @@ -995,13 +920,13 @@ static inline int ip6_default_np_autolabel(struct net *net) } } #else -static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, +static inline __be32 ip6_make_flowlabel(const struct net *net, struct sk_buff *skb, __be32 flowlabel, bool autolabel, struct flowi6 *fl6) { return flowlabel; } -static inline int ip6_default_np_autolabel(struct net *net) +static inline int ip6_default_np_autolabel(const struct net *net) { return 0; } @@ -1010,11 +935,11 @@ static inline int ip6_default_np_autolabel(struct net *net) #if IS_ENABLED(CONFIG_IPV6) static inline int ip6_multipath_hash_policy(const struct net *net) { - return net->ipv6.sysctl.multipath_hash_policy; + return READ_ONCE(net->ipv6.sysctl.multipath_hash_policy); } static inline u32 ip6_multipath_hash_fields(const struct net *net) { - return net->ipv6.sysctl.multipath_hash_fields; + return READ_ONCE(net->ipv6.sysctl.multipath_hash_fields); } #else static inline int ip6_multipath_hash_policy(const struct net *net) @@ -1103,8 +1028,7 @@ void ip6_flush_pending_frames(struct sock *sk); int ip6_send_skb(struct sk_buff *skb); struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, - struct inet_cork_full *cork, - struct inet6_cork *v6_cork); + struct inet_cork_full *cork); struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -1115,8 +1039,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { - return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, - &inet6_sk(sk)->cork); + return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, @@ -1147,11 +1070,11 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); * Extension header (options) processing */ -void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p, - struct in6_addr *saddr); -void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto); +u8 ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); +u8 ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, + u8 proto); int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, __be16 *frag_offp); @@ -1170,9 +1093,19 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type); -struct in6_addr *fl6_update_dst(struct flowi6 *fl6, - const struct ipv6_txoptions *opt, - struct in6_addr *orig); +struct in6_addr *__fl6_update_dst(struct flowi6 *fl6, + const struct ipv6_txoptions *opt, + struct in6_addr *orig); + +static inline struct in6_addr * +fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, + struct in6_addr *orig) +{ + if (likely(!opt)) + return NULL; + + return __fl6_update_dst(fl6, opt, orig); +} /* * socket options (ipv6_sockglue.c) @@ -1188,10 +1121,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, +int __ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); -int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, +int ip6_datagram_connect(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); +int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); void ip6_datagram_release_cb(struct sock *sk); @@ -1208,8 +1141,8 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); void inet6_cleanup_sock(struct sock *sk); void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); -int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); -int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); +int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); @@ -1280,12 +1213,15 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, static inline int ip6_sock_set_v6only(struct sock *sk) { - if (inet_sk(sk)->inet_num) - return -EINVAL; + int ret = 0; + lock_sock(sk); - sk->sk_ipv6only = true; + if (inet_sk(sk)->inet_num) + ret = -EINVAL; + else + sk->sk_ipv6only = true; release_sock(sk); - return 0; + return ret; } static inline void ip6_sock_set_recverr(struct sock *sk) diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 38ef66826939..41d9fc6965f9 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -69,9 +69,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) int refs = 1; rcu_read_lock(); - /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ - if (READ_ONCE(fq->q.fqdir->dead)) - goto out_rcu_unlock; spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) @@ -80,6 +77,12 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) fq->q.flags |= INET_FRAG_DROP; inet_frag_kill(&fq->q, &refs); + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) { + inet_frag_queue_flush(&fq->q, 0); + goto out; + } + dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) goto out; diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 8a3465c8c2c5..d3013e721b14 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -80,7 +80,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly; /* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ struct ipv6_bpf_stub { - int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, + int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags); struct sock *(*udp6_lib_lookup)(const struct net *net, const struct in6_addr *saddr, __be16 sport, diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index 9804fa5d9c67..18f511da9a09 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -195,35 +195,15 @@ struct iucv_handler { struct list_head paths; }; -/** - * iucv_register: - * @handler: address of iucv handler structure - * @smp: != 0 indicates that the handler can deal with out of order messages - * - * Registers a driver with IUCV. - * - * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid - * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus. - */ int iucv_register(struct iucv_handler *handler, int smp); +void iucv_unregister(struct iucv_handler *handler, int smp); /** - * iucv_unregister - * @handler: address of iucv handler structure - * @smp: != 0 indicates that the handler can deal with out of order messages - * - * Unregister driver from IUCV. - */ -void iucv_unregister(struct iucv_handler *handle, int smp); - -/** - * iucv_path_alloc + * iucv_path_alloc - Allocate a new path structure for use with iucv_connect. * @msglim: initial message limit * @flags: initial flags * @gfp: kmalloc allocation flag * - * Allocate a new path structure for use with iucv_connect. - * * Returns: NULL if the memory allocation failed or a pointer to the * path structure. */ @@ -231,7 +211,7 @@ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp) { struct iucv_path *path; - path = kzalloc(sizeof(struct iucv_path), gfp); + path = kzalloc_obj(struct iucv_path, gfp); if (path) { path->msglim = msglim; path->flags = flags; @@ -240,229 +220,48 @@ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp) } /** - * iucv_path_free + * iucv_path_free - Frees a path structure. * @path: address of iucv path structure - * - * Frees a path structure. */ static inline void iucv_path_free(struct iucv_path *path) { kfree(path); } -/** - * iucv_path_accept - * @path: address of iucv path structure - * @handler: address of iucv handler structure - * @userdata: 16 bytes of data reflected to the communication partner - * @private: private data passed to interrupt handlers for this path - * - * This function is issued after the user received a connection pending - * external interrupt and now wishes to complete the IUCV communication path. - * - * Returns: the result of the CP IUCV call. - */ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, u8 *userdata, void *private); -/** - * iucv_path_connect - * @path: address of iucv path structure - * @handler: address of iucv handler structure - * @userid: 8-byte user identification - * @system: 8-byte target system identification - * @userdata: 16 bytes of data reflected to the communication partner - * @private: private data passed to interrupt handlers for this path - * - * This function establishes an IUCV path. Although the connect may complete - * successfully, you are not able to use the path until you receive an IUCV - * Connection Complete external interrupt. - * - * Returns: the result of the CP IUCV call. - */ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, u8 *userid, u8 *system, u8 *userdata, void *private); -/** - * iucv_path_quiesce: - * @path: address of iucv path structure - * @userdata: 16 bytes of data reflected to the communication partner - * - * This function temporarily suspends incoming messages on an IUCV path. - * You can later reactivate the path by invoking the iucv_resume function. - * - * Returns: the result from the CP IUCV call. - */ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata); -/** - * iucv_path_resume: - * @path: address of iucv path structure - * @userdata: 16 bytes of data reflected to the communication partner - * - * This function resumes incoming messages on an IUCV path that has - * been stopped with iucv_path_quiesce. - * - * Returns: the result from the CP IUCV call. - */ int iucv_path_resume(struct iucv_path *path, u8 *userdata); -/** - * iucv_path_sever - * @path: address of iucv path structure - * @userdata: 16 bytes of data reflected to the communication partner - * - * This function terminates an IUCV path. - * - * Returns: the result from the CP IUCV call. - */ int iucv_path_sever(struct iucv_path *path, u8 *userdata); -/** - * iucv_message_purge - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @srccls: source class of message - * - * Cancels a message you have sent. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, u32 srccls); -/** - * iucv_message_receive - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: flags that affect how the message is received (IUCV_IPBUFLST) - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of data buffer - * @residual: - * - * This function receives messages that are being sent to you over - * established paths. This function will deal with RMDATA messages - * embedded in struct iucv_message as well. - * - * Locking: local_bh_enable/local_bh_disable - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, size_t *residual); -/** - * __iucv_message_receive - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: flags that affect how the message is received (IUCV_IPBUFLST) - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of data buffer - * @residual: - * - * This function receives messages that are being sent to you over - * established paths. This function will deal with RMDATA messages - * embedded in struct iucv_message as well. - * - * Locking: no locking. - * - * Returns: the result from the CP IUCV call. - */ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, size_t *residual); -/** - * iucv_message_reject - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * - * The reject function refuses a specified message. Between the time you - * are notified of a message and the time that you complete the message, - * the message may be rejected. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg); -/** - * iucv_message_reply - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the reply is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) - * @reply: address of data buffer or address of struct iucv_array - * @size: length of reply data buffer - * - * This function responds to the two-way messages that you receive. You - * must identify completely the message to which you wish to reply. ie, - * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into - * the parameter list. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *reply, size_t size); -/** - * iucv_message_send - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) - * @srccls: source class of message - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of send buffer - * - * This function transmits data to another application. Data to be - * transmitted is in a buffer and this is a one-way message and the - * receiver will not reply to the message. - * - * Locking: local_bh_enable/local_bh_disable - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); -/** - * __iucv_message_send - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) - * @srccls: source class of message - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of send buffer - * - * This function transmits data to another application. Data to be - * transmitted is in a buffer and this is a one-way message and the - * receiver will not reply to the message. - * - * Locking: no locking. - * - * Returns: the result from the CP IUCV call. - */ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); -/** - * iucv_message_send2way - * @path: address of iucv path structure - * @msg: address of iucv msg structure - * @flags: how the message is sent and the reply is received - * (IUCV_IPRMDATA, IUCV_IPBUFLST, IUCV_IPPRTY, IUCV_ANSLST) - * @srccls: source class of message - * @buffer: address of data buffer or address of struct iucv_array - * @size: length of send buffer - * @ansbuf: address of answer buffer or address of struct iucv_array - * @asize: size of reply buffer - * - * This function transmits data to another application. Data to be - * transmitted is in a buffer. The receiver of the send is expected to - * reply to the message and a buffer is provided into which IUCV moves - * the reply to this message. - * - * Returns: the result from the CP IUCV call. - */ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size, void *answer, size_t asize, size_t *residual); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 1eb8dad18f7e..710e98665eb3 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -207,18 +207,19 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) static inline struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev; + rcu_read_lock(); + dev = skb_dst_dev_rcu(skb); if (netif_is_l3_slave(dev)) { struct net_device *master; - rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); - rcu_read_unlock(); } + rcu_read_unlock(); return skb; } diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 481a7b28e6f2..82b5d21aae87 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, * @pending: current number of XSkFQEs to refill * @thresh: threshold below which the queue is refilled * @buf_len: HW-writeable length per each buffer + * @truesize: step between consecutive buffers, 0 if none exists * @nid: ID of the closest NUMA node with memory */ struct libeth_xskfq { @@ -614,6 +615,8 @@ struct libeth_xskfq { u32 thresh; u32 buf_len; + u32 truesize; + int nid; }; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a55085cf4ec4..adce2144a678 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2025 Intel Corporation + * Copyright (C) 2018 - 2026 Intel Corporation */ #ifndef MAC80211_H @@ -706,6 +706,7 @@ struct ieee80211_parsed_tpe { * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @epcs_support: does this BSS support EPCS + * @uhr_support: does this BSS support UHR * @csa_active: marks whether a channel switch is going on. * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL @@ -832,6 +833,8 @@ struct ieee80211_bss_conf { u8 pwr_reduction; bool eht_support; bool epcs_support; + bool uhr_support; + bool csa_active; bool mu_mimo_owner; @@ -1529,6 +1532,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * known the frame shouldn't be reported. * @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by * hardware or driver) + * @RX_FLAG_RADIOTAP_VHT: VHT radiotap data is present */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1564,6 +1568,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_LSIG = BIT(28), RX_FLAG_NO_PSDU = BIT(29), RX_FLAG_8023 = BIT(30), + RX_FLAG_RADIOTAP_VHT = BIT(31), }; /** @@ -1596,6 +1601,7 @@ enum mac80211_rx_encoding { RX_ENC_VHT, RX_ENC_HE, RX_ENC_EHT, + RX_ENC_UHR, }; /** @@ -1629,7 +1635,7 @@ enum mac80211_rx_encoding { * @antenna: antenna used * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) - * @nss: number of streams (VHT, HE and EHT only) + * @nss: number of streams (VHT, HE, EHT and UHR only) * @flag: %RX_FLAG_\* * @encoding: &enum mac80211_rx_encoding * @bw: &enum rate_info_bw @@ -1640,6 +1646,11 @@ enum mac80211_rx_encoding { * @eht: EHT specific rate information * @eht.ru: EHT RU, from &enum nl80211_eht_ru_alloc * @eht.gi: EHT GI, from &enum nl80211_eht_gi + * @uhr: UHR specific rate information + * @uhr.ru: UHR RU, from &enum nl80211_eht_ru_alloc + * @uhr.gi: UHR GI, from &enum nl80211_eht_gi + * @uhr.elr: UHR ELR MCS was used + * @uhr.im: UHR interference mitigation was used * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1671,6 +1682,12 @@ struct ieee80211_rx_status { u8 ru:4; u8 gi:2; } eht; + struct { + u8 ru:4; + u8 gi:2; + u8 elr:1; + u8 im:1; + } uhr; }; u8 rate_idx; u8 nss; @@ -1901,6 +1918,31 @@ enum ieee80211_offload_flags { }; /** + * struct ieee80211_eml_params - EHT Operating mode notification parameters + * + * EML Operating mode notification parameters received in the Operating mode + * notification frame. This struct is used as a container to pass the info to + * the underlay driver. + * + * @link_id: the link ID where the Operating mode notification frame has been + * received. + * @control: EML control field defined in P802.11be section 9.4.1.76. + * @link_bitmap: eMLSR/eMLMR enabled links defined in P802.11be + * section 9.4.1.76. + * @emlmr_mcs_map_count: eMLMR number of valid mcs_map_bw fields according to + * P802.11be section 9.4.1.76 (valid if eMLMR mode control bit is set). + * @emlmr_mcs_map_bw: eMLMR supported MCS and NSS set subfileds defined in + * P802.11be section 9.4.1.76 (valid if eMLMR mode control bit is set). + */ +struct ieee80211_eml_params { + u8 link_id; + u8 control; + u16 link_bitmap; + u8 emlmr_mcs_map_count; + u8 emlmr_mcs_map_bw[9]; +}; + +/** * struct ieee80211_vif_cfg - interface configuration * @assoc: association status * @ibss_joined: indicates whether this station is part of an IBSS or not @@ -2432,6 +2474,7 @@ struct ieee80211_sta_aggregates { * @he_cap: HE capabilities of this STA * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities * @eht_cap: EHT capabilities of this STA + * @uhr_cap: UHR capabilities of this STA * @s1g_cap: S1G capabilities of this STA * @agg: per-link data for multi-link aggregation * @bandwidth: current bandwidth the station can receive with @@ -2455,6 +2498,7 @@ struct ieee80211_link_sta { struct ieee80211_sta_he_cap he_cap; struct ieee80211_he_6ghz_capa he_6ghz_capa; struct ieee80211_sta_eht_cap eht_cap; + struct ieee80211_sta_uhr_cap uhr_cap; struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_sta_aggregates agg; @@ -2518,6 +2562,7 @@ struct ieee80211_link_sta { * by the AP. * @valid_links: bitmap of valid links, or 0 for non-MLO * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not. + * @epp_peer: indicates that the peer is an EPP peer. */ struct ieee80211_sta { u8 addr[ETH_ALEN] __aligned(2); @@ -2542,6 +2587,7 @@ struct ieee80211_sta { struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; u16 valid_links; + bool epp_peer; struct ieee80211_link_sta deflink; struct ieee80211_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; @@ -4509,6 +4555,9 @@ struct ieee80211_prep_tx_info { * interface with the specified type would be added, and thus drivers that * implement this callback need to handle such cases. The type is the full * &enum nl80211_iftype. + * @set_eml_op_mode: Configure eMLSR/eMLMR operation mode in the underlay + * driver according to the parameter received in the EML Operating mode + * notification frame. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4904,6 +4953,10 @@ struct ieee80211_ops { struct ieee80211_neg_ttlm *ttlm); void (*prep_add_interface)(struct ieee80211_hw *hw, enum nl80211_iftype type); + int (*set_eml_op_mode)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_eml_params *eml_params); }; /** @@ -6270,6 +6323,30 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, struct ieee80211_vif *vif), void *data); +struct ieee80211_vif * +__ieee80211_iterate_interfaces(struct ieee80211_hw *hw, + struct ieee80211_vif *prev, + u32 iter_flags); + +/** + * for_each_interface - iterate interfaces under wiphy mutex + * @vif: the iterator variable + * @hw: the HW to iterate for + * @flags: the iteration flags, see &enum ieee80211_interface_iteration_flags + */ +#define for_each_interface(vif, hw, flags) \ + for (vif = __ieee80211_iterate_interfaces(hw, NULL, flags); \ + vif; \ + vif = __ieee80211_iterate_interfaces(hw, vif, flags)) + +/** + * for_each_active_interface - iterate active interfaces under wiphy mutex + * @vif: the iterator variable + * @hw: the HW to iterate for + */ +#define for_each_active_interface(vif, hw) \ + for_each_interface(vif, hw, IEEE80211_IFACE_ITER_ACTIVE) + /** * ieee80211_iterate_active_interfaces_mtx - iterate active interfaces * @@ -6282,12 +6359,18 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * @iterator: the iterator function to call, cannot sleep * @data: first argument of the iterator function */ -void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, - u32 iter_flags, - void (*iterator)(void *data, - u8 *mac, - struct ieee80211_vif *vif), - void *data); +static inline void +ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, + u32 iter_flags, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) +{ + struct ieee80211_vif *vif; + + for_each_interface(vif, hw, iter_flags | IEEE80211_IFACE_ITER_ACTIVE) + iterator(data, vif->addr, vif); +} /** * ieee80211_iterate_stations_atomic - iterate stations @@ -6306,6 +6389,20 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, struct ieee80211_sta *sta), void *data); +struct ieee80211_sta * +__ieee80211_iterate_stations(struct ieee80211_hw *hw, + struct ieee80211_sta *prev); + +/** + * for_each_station - iterate stations under wiphy mutex + * @sta: the iterator variable + * @hw: the HW to iterate for + */ +#define for_each_station(sta, hw) \ + for (sta = __ieee80211_iterate_stations(hw, NULL); \ + sta; \ + sta = __ieee80211_iterate_stations(hw, sta)) + /** * ieee80211_iterate_stations_mtx - iterate stations * @@ -6318,10 +6415,17 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw, * @iterator: the iterator function to call * @data: first argument of the iterator function */ -void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw, - void (*iterator)(void *data, - struct ieee80211_sta *sta), - void *data); +static inline void +ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data) +{ + struct ieee80211_sta *sta; + + for_each_station(sta, hw) + iterator(data, sta); +} /** * ieee80211_queue_work - add work onto the mac80211 workqueue @@ -7221,7 +7325,7 @@ ieee80211_get_he_6ghz_capa_vif(const struct ieee80211_supported_band *sband, } /** - * ieee80211_get_eht_iftype_cap_vif - return ETH capabilities for sband/vif + * ieee80211_get_eht_iftype_cap_vif - return EHT capabilities for sband/vif * @sband: the sband to search for the iftype on * @vif: the vif to get the iftype from * @@ -7235,6 +7339,20 @@ ieee80211_get_eht_iftype_cap_vif(const struct ieee80211_supported_band *sband, } /** + * ieee80211_get_uhr_iftype_cap_vif - return UHR capabilities for sband/vif + * @sband: the sband to search for the iftype on + * @vif: the vif to get the iftype from + * + * Return: pointer to the struct ieee80211_sta_uhr_cap, or %NULL is none found + */ +static inline const struct ieee80211_sta_uhr_cap * +ieee80211_get_uhr_iftype_cap_vif(const struct ieee80211_supported_band *sband, + struct ieee80211_vif *vif) +{ + return ieee80211_get_uhr_iftype_cap(sband, ieee80211_vif_type_p2p(vif)); +} + +/** * ieee80211_update_mu_groups - set the VHT MU-MIMO groud data * * @vif: the specified virtual interface @@ -7289,7 +7407,9 @@ void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, * @band: the band to transmit on * @sta: optional pointer to get the station to send the frame to * - * Return: %true if the skb was prepared, %false otherwise + * Return: %true if the skb was prepared, %false otherwise. + * On failure, the skb is freed by this function; callers must not + * free it again. * * Note: must be called under RCU lock */ diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 57df78cfbf82..766f4fb25e26 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -35,6 +35,8 @@ enum gdma_request_type { GDMA_CREATE_MR = 31, GDMA_DESTROY_MR = 32, GDMA_QUERY_HWC_TIMEOUT = 84, /* 0x54 */ + GDMA_ALLOC_DM = 96, /* 0x60 */ + GDMA_DESTROY_DM = 97, /* 0x61 */ }; #define GDMA_RESOURCE_DOORBELL_PAGE 27 @@ -382,6 +384,10 @@ struct gdma_irq_context { char name[MANA_IRQ_NAME_SZ]; }; +enum gdma_context_flags { + GC_PROBE_SUCCEEDED = 0, +}; + struct gdma_context { struct device *dev; struct dentry *mana_pci_debugfs; @@ -430,6 +436,8 @@ struct gdma_context { u64 pf_cap_flags1; struct workqueue_struct *service_wq; + + unsigned long flags; }; static inline bool mana_gd_is_mana(struct gdma_dev *gd) @@ -486,6 +494,8 @@ struct gdma_wqe { #define INLINE_OOB_SMALL_SIZE 8 #define INLINE_OOB_LARGE_SIZE 24 +#define MANA_MAX_TX_WQE_SGL_ENTRIES 30 + #define MAX_TX_WQE_SIZE 512 #define MAX_RX_WQE_SIZE 256 @@ -591,6 +601,20 @@ enum { /* Driver can self reset on FPGA Reconfig EQE notification */ #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) +/* Driver detects stalled send queues and recovers them */ +#define GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY BIT(18) + +#define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6) + +/* Driver supports linearizing the skb when num_sge exceeds hardware limit */ +#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20) + +/* Driver can send HWC periodically to query stats */ +#define GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY BIT(21) + +/* Driver can handle hardware recovery events during probe */ +#define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ @@ -599,7 +623,12 @@ enum { GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \ GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ - GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \ + GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \ + GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ + GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \ + GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \ + GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY) #define GDMA_DRV_CAP_FLAGS2 0 @@ -839,6 +868,8 @@ enum gdma_mr_type { GDMA_MR_TYPE_GVA = 2, /* Guest zero-based address MRs */ GDMA_MR_TYPE_ZBVA = 4, + /* Device address MRs */ + GDMA_MR_TYPE_DM = 5, }; struct gdma_create_mr_params { @@ -854,6 +885,12 @@ struct gdma_create_mr_params { u64 dma_region_handle; enum gdma_mr_access_flags access_flags; } zbva; + struct { + u64 dm_handle; + u64 offset; + u64 length; + enum gdma_mr_access_flags access_flags; + } da; }; }; @@ -868,13 +905,23 @@ struct gdma_create_mr_request { u64 dma_region_handle; u64 virtual_address; enum gdma_mr_access_flags access_flags; - } gva; + } __packed gva; struct { u64 dma_region_handle; enum gdma_mr_access_flags access_flags; - } zbva; - }; + } __packed zbva; + struct { + u64 dm_handle; + u64 offset; + enum gdma_mr_access_flags access_flags; + } __packed da; + } __packed; u32 reserved_2; + union { + struct { + u64 length; + } da_ext; + }; };/* HW DATA */ struct gdma_create_mr_response { @@ -893,6 +940,27 @@ struct gdma_destroy_mr_response { struct gdma_resp_hdr hdr; };/* HW DATA */ +struct gdma_alloc_dm_req { + struct gdma_req_hdr hdr; + u64 length; + u32 alignment; + u32 flags; +}; /* HW Data */ + +struct gdma_alloc_dm_resp { + struct gdma_resp_hdr hdr; + u64 dm_handle; +}; /* HW Data */ + +struct gdma_destroy_dm_req { + struct gdma_req_hdr hdr; + u64 dm_handle; +}; /* HW Data */ + +struct gdma_destroy_dm_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + int mana_gd_verify_vf_version(struct pci_dev *pdev); int mana_gd_register_device(struct gdma_dev *gd); diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h index 83cf93338eb3..16feb39616c1 100644 --- a/include/net/mana/hw_channel.h +++ b/include/net/mana/hw_channel.h @@ -24,6 +24,8 @@ #define HWC_INIT_DATA_PF_DEST_CQ_ID 11 #define HWC_DATA_CFG_HWC_TIMEOUT 1 +#define HWC_DATA_HW_LINK_CONNECT 2 +#define HWC_DATA_HW_LINK_DISCONNECT 3 #define HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS 30000 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 0921485565c0..a078af283bdd 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -375,6 +375,14 @@ struct mana_tx_qp { struct mana_ethtool_stats { u64 stop_queue; u64 wake_queue; + u64 tx_cqe_err; + u64 tx_cqe_unknown_type; + u64 tx_linear_pkt_cnt; + u64 rx_coalesced_err; + u64 rx_cqe_unknown_type; +}; + +struct mana_ethtool_hc_stats { u64 hc_rx_discards_no_wqe; u64 hc_rx_err_vport_disabled; u64 hc_rx_bytes; @@ -402,10 +410,6 @@ struct mana_ethtool_stats { u64 hc_tx_mcast_pkts; u64 hc_tx_mcast_bytes; u64 hc_tx_err_gdma; - u64 tx_cqe_err; - u64 tx_cqe_unknown_type; - u64 rx_coalesced_err; - u64 rx_cqe_unknown_type; }; struct mana_ethtool_phy_stats { @@ -473,15 +477,25 @@ struct mana_context { u16 num_ports; u8 bm_hostmode; + struct mana_ethtool_hc_stats hc_stats; struct mana_eq *eqs; struct dentry *mana_eqs_debugfs; + struct workqueue_struct *per_port_queue_reset_wq; + /* Workqueue for querying hardware stats */ + struct delayed_work gf_stats_work; + bool hwc_timeout_occurred; struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; + + /* Link state change work */ + struct work_struct link_change_work; + u32 link_event; }; struct mana_port_context { struct mana_context *ac; struct net_device *ndev; + struct work_struct queue_reset_work; u8 mac_addr[ETH_ALEN]; @@ -573,13 +587,14 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); -void mana_query_gf_stats(struct mana_port_context *apc); +int mana_query_gf_stats(struct mana_context *ac); int mana_query_link_cfg(struct mana_port_context *apc); int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, int enable_clamping); void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); +void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc); extern const struct ethtool_ops mana_ethtool_ops; extern struct dentry *mana_debugfs_root; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4a30bd458c5a..2dfee6d4258a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -92,15 +92,17 @@ struct neigh_parms { static inline void neigh_var_set(struct neigh_parms *p, int index, int val) { set_bit(index, p->data_state); - p->data[index] = val; + WRITE_ONCE(p->data[index], val); } -#define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) +#define __NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) +#define NEIGH_VAR(p, attr) READ_ONCE(__NEIGH_VAR(p, attr)) +#define NEIGH_VAR_PTR(p, attr) (&(__NEIGH_VAR(p, attr))) /* In ndo_neigh_setup, NEIGH_VAR_INIT should be used. * In other cases, NEIGH_VAR_SET should be used. */ -#define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val) +#define NEIGH_VAR_INIT(p, attr, val) (__NEIGH_VAR(p, attr) = val) #define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) static inline void neigh_parms_data_state_setall(struct neigh_parms *p) @@ -236,7 +238,7 @@ struct neigh_table { atomic_t gc_entries; struct list_head gc_list; struct list_head managed_list; - rwlock_t lock; + spinlock_t lock; unsigned long last_rand; struct neigh_statistics __percpu *stats; struct neigh_hash_table __rcu *nht; @@ -378,6 +380,13 @@ struct net *neigh_parms_net(const struct neigh_parms *parms) unsigned long neigh_rand_reach_time(unsigned long base); +static inline void neigh_set_reach_time(struct neigh_parms *p) +{ + unsigned long base = NEIGH_VAR(p, BASE_REACHABLE_TIME); + + WRITE_ONCE(p->reachable_time, neigh_rand_reach_time(base)); +} + void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cb664f6e3558..d7bec49ee9ea 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -37,6 +37,7 @@ #include <net/netns/smc.h> #include <net/netns/bpf.h> #include <net/netns/mctp.h> +#include <net/netns/vsock.h> #include <net/net_trackers.h> #include <linux/ns_common.h> #include <linux/idr.h> @@ -120,6 +121,7 @@ struct net { * it is critical that it is on a read_mostly cache line. */ u32 hash_mix; + bool is_dying; struct net_device *loopback_dev; /* The loopback */ @@ -196,6 +198,9 @@ struct net { /* Move to a better place when the config guard is removed. */ struct mutex rtnl_mutex; #endif +#if IS_ENABLED(CONFIG_VSOCKETS) + struct netns_vsock vsock; +#endif } __randomize_layout; #include <linux/seq_file_net.h> diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index cd00e0406cf4..95ed28212f4e 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -14,6 +14,10 @@ struct netdev_config { u8 hds_config; }; +struct netdev_queue_config { + u32 rx_page_size; +}; + /* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; @@ -111,6 +115,11 @@ void netdev_stat_queue_sum(struct net_device *netdev, int tx_start, int tx_end, struct netdev_queue_stats_tx *tx_sum); +enum { + /* The queue checks and honours the page size qcfg parameter */ + QCFG_RX_PAGE_SIZE = 0x1, +}; + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * @@ -130,27 +139,52 @@ void netdev_stat_queue_sum(struct net_device *netdev, * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used * for this queue. Return NULL on error. * + * @ndo_default_qcfg: (Optional) Populate queue config struct with defaults. + * Queue config structs are passed to this helper before + * the user-requested settings are applied. + * + * @ndo_validate_qcfg: (Optional) Check if queue config is supported. + * Called when configuration affecting a queue may be + * changing, either due to NIC-wide config, or config + * scoped to the queue at a specified index. + * When NIC-wide config is changed the callback will + * be invoked for all queues. + * + * @supported_params: Bitmask of supported parameters, see QCFG_*. + * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only * be called for an interface which is open. */ struct netdev_queue_mgmt_ops { - size_t ndo_queue_mem_size; - int (*ndo_queue_mem_alloc)(struct net_device *dev, - void *per_queue_mem, - int idx); - void (*ndo_queue_mem_free)(struct net_device *dev, - void *per_queue_mem); - int (*ndo_queue_start)(struct net_device *dev, - void *per_queue_mem, - int idx); - int (*ndo_queue_stop)(struct net_device *dev, - void *per_queue_mem, - int idx); - struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, - int idx); + size_t ndo_queue_mem_size; + int (*ndo_queue_mem_alloc)(struct net_device *dev, + struct netdev_queue_config *qcfg, + void *per_queue_mem, + int idx); + void (*ndo_queue_mem_free)(struct net_device *dev, + void *per_queue_mem); + int (*ndo_queue_start)(struct net_device *dev, + struct netdev_queue_config *qcfg, + void *per_queue_mem, + int idx); + int (*ndo_queue_stop)(struct net_device *dev, + void *per_queue_mem, + int idx); + void (*ndo_default_qcfg)(struct net_device *dev, + struct netdev_queue_config *qcfg); + int (*ndo_validate_qcfg)(struct net_device *dev, + struct netdev_queue_config *qcfg, + struct netlink_ext_ack *extack); + struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, + int idx); + + unsigned int supported_params; }; +void netdev_queue_config(struct net_device *dev, int rxq, + struct netdev_queue_config *qcfg); + bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); /** @@ -310,6 +344,17 @@ static inline void netif_subqueue_sent(const struct net_device *dev, netdev_tx_sent_queue(txq, bytes); } +static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq) +{ + unsigned long trans_start = READ_ONCE(txq->trans_start); + + if (netif_xmit_stopped(txq) && + time_after(jiffies, trans_start + txq->dev->watchdog_timeo)) + return jiffies_to_msecs(jiffies - trans_start); + + return 0; +} + #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ({ \ struct netdev_queue *_txq; \ diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 8cdcd138b33f..cfa72c485387 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -7,6 +7,7 @@ #include <linux/sysfs.h> #include <net/xdp.h> #include <net/page_pool/types.h> +#include <net/netdev_queues.h> /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { @@ -27,6 +28,7 @@ struct netdev_rx_queue { struct xsk_buff_pool *pool; #endif struct napi_struct *napi; + struct netdev_queue_config qcfg; struct pp_memory_provider_params mp_params; } ____cacheline_aligned_in_smp; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index aa0a7c82199e..bc42dd0e10e6 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -16,6 +16,7 @@ #include <linux/bitops.h> #include <linux/compiler.h> +#include <net/netns/generic.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_sctp.h> diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 1b58b5b91ff6..cf0166520cf3 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -13,20 +13,20 @@ struct nf_conncount_list { u32 last_gc; /* jiffies at most recent gc */ struct list_head head; /* connections with the same filtering key */ unsigned int count; /* length of list */ + unsigned int last_gc_count; /* length of list at most recent gc */ }; struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen); void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data); -unsigned int nf_conncount_count(struct net *net, - struct nf_conncount_data *data, - const u32 *key, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); +unsigned int nf_conncount_count_skb(struct net *net, + const struct sk_buff *skb, + u16 l3num, + struct nf_conncount_data *data, + const u32 *key); -int nf_conncount_add(struct net *net, struct nf_conncount_list *list, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone); +int nf_conncount_add_skb(struct net *net, const struct sk_buff *skb, + u16 l3num, struct nf_conncount_list *list); void nf_conncount_list_init(struct nf_conncount_list *list); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 6929f8daf1ed..cd5020835a6d 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -30,7 +30,7 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* convert protoinfo to nfnetink attributes */ + /* convert protoinfo to nfnetlink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct, bool destroy); diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index f7dd950ff250..4d55b7325707 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -11,7 +11,7 @@ #ifndef _NF_CONNTRACK_TUPLE_H #define _NF_CONNTRACK_TUPLE_H -#include <linux/netfilter/x_tables.h> +#include <linux/netfilter.h> #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <linux/list_nulls.h> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index c003cd194fa2..b09c11c048d5 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -107,6 +107,19 @@ enum flow_offload_xmit_type { #define NF_FLOW_TABLE_ENCAP_MAX 2 +struct flow_offload_tunnel { + union { + struct in_addr src_v4; + struct in6_addr src_v6; + }; + union { + struct in_addr dst_v4; + struct in6_addr dst_v6; + }; + + u8 l3_proto; +}; + struct flow_offload_tuple { union { struct in_addr src_v4; @@ -130,22 +143,25 @@ struct flow_offload_tuple { __be16 proto; } encap[NF_FLOW_TABLE_ENCAP_MAX]; + struct flow_offload_tunnel tun; + /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; u8 dir:2, xmit_type:3, encap_num:2, + tun_num:2, in_vlan_ingress:2; u16 mtu; union { struct { struct dst_entry *dst_cache; + u32 ifidx; u32 dst_cookie; }; struct { u32 ifidx; - u32 hw_ifidx; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; } out; @@ -206,7 +222,9 @@ struct nf_flow_route { u16 id; __be16 proto; } encap[NF_FLOW_TABLE_ENCAP_MAX]; + struct flow_offload_tunnel tun; u8 num_encaps:2, + num_tuns:2, ingress_vlans:2; } in; struct { @@ -222,6 +240,12 @@ struct nf_flow_route { struct flow_offload *flow_offload_alloc(struct nf_conn *ct); void flow_offload_free(struct flow_offload *flow); +struct nft_flowtable; +struct nft_pktinfo; +int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct, + struct nf_flow_route *route, enum ip_conntrack_dir dir, + struct nft_flowtable *ft); + static inline int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, flow_setup_cb_t *cb, void *cb_priv) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 4aeffddb7586..45eb26b2e95b 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -6,11 +6,13 @@ #include <linux/ipv6.h> #include <linux/jhash.h> #include <linux/netfilter.h> +#include <linux/rhashtable-types.h> #include <linux/skbuff.h> /* Each queued (to userspace) skbuff has one of these. */ struct nf_queue_entry { struct list_head list; + struct rhash_head hash_node; struct sk_buff *skb; unsigned int id; unsigned int hook_index; /* index in hook_entries->hook[] */ @@ -19,7 +21,9 @@ struct nf_queue_entry { struct net_device *physout; #endif struct nf_hook_state state; + bool nf_ct_is_unconfirmed; u16 size; /* sizeof(entry) + saved route keys */ + u16 queue_num; /* extra space to store route keys */ }; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fab7dc73f738..ec8a8ec9c0aa 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -6,7 +6,6 @@ #include <linux/list.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> -#include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> #include <linux/rhashtable.h> @@ -317,11 +316,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) * @NFT_ITER_UNSPEC: unspecified, to catch errors * @NFT_ITER_READ: read-only iteration over set elements * @NFT_ITER_UPDATE: iteration under mutex to update set element state + * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element */ enum nft_iter_type { NFT_ITER_UNSPEC, NFT_ITER_READ, NFT_ITER_UPDATE, + NFT_ITER_UPDATE_CLONE, }; struct nft_set; @@ -452,6 +453,7 @@ struct nft_set_ext; * @init: initialize private data of new set instance * @destroy: destroy private data of set instance * @gc_init: initialize garbage collection + * @abort_skip_removal: skip removal of elements from abort path * @elemsize: element private size * * Operations lookup, update and delete have simpler interfaces, are faster @@ -509,6 +511,7 @@ struct nft_set_ops { const struct nft_set *set); void (*gc_init)(const struct nft_set *set); + bool abort_skip_removal; unsigned int elemsize; }; @@ -871,6 +874,8 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); +void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, + struct nft_set_elem_expr *elem_expr); void nft_set_elem_destroy(const struct nft_set *set, const struct nft_elem_priv *elem_priv, bool destroy_expr); @@ -1091,6 +1096,29 @@ struct nft_rule_blob { __attribute__((aligned(__alignof__(struct nft_rule_dp)))); }; +enum nft_chain_types { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + +/** + * struct nft_chain_validate_state - validation state + * + * If a chain is encountered again during table validation it is + * possible to avoid revalidation provided the calling context is + * compatible. This structure stores relevant calling context of + * previous validations. + * + * @hook_mask: the hook numbers and locations the chain is linked to + * @depth: the deepest call chain level the chain is linked to + */ +struct nft_chain_validate_state { + u8 hook_mask[NFT_CHAIN_T_MAX]; + u8 depth; +}; + /** * struct nft_chain - nf_tables chain * @@ -1109,6 +1137,7 @@ struct nft_rule_blob { * @udlen: user data length * @udata: user data in the chain * @blob_next: rule blob pointer to the next in the chain + * @vstate: validation state */ struct nft_chain { struct nft_rule_blob __rcu *blob_gen_0; @@ -1128,9 +1157,10 @@ struct nft_chain { /* Only used during control plane commit phase: */ struct nft_rule_blob *blob_next; + struct nft_chain_validate_state vstate; }; -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv); @@ -1138,13 +1168,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -enum nft_chain_types { - NFT_CHAIN_T_DEFAULT = 0, - NFT_CHAIN_T_ROUTE, - NFT_CHAIN_T_NAT, - NFT_CHAIN_T_MAX -}; - /** * struct nft_chain_type - nf_tables chain type info * @@ -1838,6 +1861,11 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline int nft_trans_gc_space(const struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + static inline void nft_ctx_update(struct nft_ctx *ctx, const struct nft_trans *trans) { diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index a0633eeaec97..c53ac00bb974 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -42,7 +42,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) if (ip6h->version != 6) return -1; - pkt_len = ntohs(ip6h->payload_len); + pkt_len = ipv6_payload_len(pkt->skb, ip6h); skb_len = pkt->skb->len - skb_network_offset(pkt->skb); if (pkt_len + sizeof(*ip6h) > skb_len) return -1; @@ -86,7 +86,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) if (ip6h->version != 6) goto inhdr_error; - pkt_len = ntohs(ip6h->payload_len); + pkt_len = ipv6_payload_len(pkt->skb, ip6h); if (pkt_len + sizeof(*ip6h) > pkt->skb->len) { idev = __in6_dev_get(nft_in(pkt)); __IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS); diff --git a/include/net/netmem.h b/include/net/netmem.h index f7dacc9e75fd..a96b3e5e5574 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -68,10 +68,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); enum net_iov_type { NET_IOV_DMABUF, NET_IOV_IOURING, - - /* Force size to unsigned long to make the NET_IOV_ASSERTS below pass. - */ - NET_IOV_MAX = ULONG_MAX }; /* A memory descriptor representing abstract networking I/O vectors, @@ -247,6 +243,23 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem) return page_to_pfn(netmem_to_page(netmem)); } +/* XXX: How to extract netmem_desc from page must be changed, once + * netmem_desc no longer overlays on page and will be allocated through + * slab. + */ +#define __pp_page_to_nmdesc(p) (_Generic((p), \ + const struct page * : (const struct netmem_desc *)(p), \ + struct page * : (struct netmem_desc *)(p))) + +/* CAUTION: Check if the page is a pp page before calling this helper or + * know it's a pp page. + */ +#define pp_page_to_nmdesc(p) \ +({ \ + DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ + __pp_page_to_nmdesc(p); \ +}) + /** * __netmem_to_nmdesc - unsafely get pointer to the &netmem_desc backing * @netmem @@ -265,42 +278,25 @@ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem) return (__force struct netmem_desc *)netmem; } -/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to - * common fields. - * @netmem: netmem reference to extract as net_iov. - * - * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic, - * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access - * these fields without a type check to make sure that the underlying mem is - * net_iov or page. +/* netmem_to_nmdesc - convert netmem_ref to struct netmem_desc * for + * access to common fields. + * @netmem: netmem reference to get netmem_desc. * - * The resulting value of this function can only be used to access the fields - * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in - * undefined behavior. + * All the sub types of netmem_ref (netmem_desc, net_iov) have the same + * pp, pp_magic, dma_addr, and pp_ref_count fields via netmem_desc. * - * Return: the netmem_ref cast to net_iov* regardless of its underlying type. + * Return: the pointer to struct netmem_desc * regardless of its + * underlying type. */ -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem) { - return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); -} + void *p = (void *)((__force unsigned long)netmem & ~NET_IOV); -/* XXX: How to extract netmem_desc from page must be changed, once - * netmem_desc no longer overlays on page and will be allocated through - * slab. - */ -#define __pp_page_to_nmdesc(p) (_Generic((p), \ - const struct page * : (const struct netmem_desc *)(p), \ - struct page * : (struct netmem_desc *)(p))) + if (netmem_is_net_iov(netmem)) + return &((struct net_iov *)p)->desc; -/* CAUTION: Check if the page is a pp page before calling this helper or - * know it's a pp page. - */ -#define pp_page_to_nmdesc(p) \ -({ \ - DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ - __pp_page_to_nmdesc(p); \ -}) + return __pp_page_to_nmdesc((struct page *)p); +} /** * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem @@ -320,12 +316,12 @@ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) static inline struct page_pool *netmem_get_pp(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->pp; + return netmem_to_nmdesc(netmem)->pp; } static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem) { - return &__netmem_clear_lsb(netmem)->pp_ref_count; + return &netmem_to_nmdesc(netmem)->pp_ref_count; } static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid) @@ -390,11 +386,39 @@ static inline bool netmem_is_pfmemalloc(netmem_ref netmem) static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->dma_addr; + return netmem_to_nmdesc(netmem)->dma_addr; } -void get_netmem(netmem_ref netmem); -void put_netmem(netmem_ref netmem); +#if defined(CONFIG_NET_DEVMEM) +static inline bool net_is_devmem_iov(const struct net_iov *niov) +{ + return niov->type == NET_IOV_DMABUF; +} +#else +static inline bool net_is_devmem_iov(const struct net_iov *niov) +{ + return false; +} +#endif + +void __get_netmem(netmem_ref netmem); +void __put_netmem(netmem_ref netmem); + +static __always_inline void get_netmem(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + __get_netmem(netmem); + else + get_page(netmem_to_page(netmem)); +} + +static __always_inline void put_netmem(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + __put_netmem(netmem); + else + put_page(netmem_to_page(netmem)); +} #define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \ do { \ diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 9b36f0ff0c20..9ef3d70e5e9c 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -13,9 +13,11 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + int sysctl_txq_reselection; int sysctl_optmem_max; u8 sysctl_txrehash; u8 sysctl_tstamp_allow_data; + u8 sysctl_bypass_prot_mem; #ifdef CONFIG_PROC_FS struct prot_inuse __percpu *prot_inuse; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 34eb3aecb3f2..8e971c7bf164 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -74,19 +74,26 @@ struct netns_ipv4 { /* TXRX readonly hotpath cache lines */ __cacheline_group_begin(netns_ipv4_read_txrx); - u8 sysctl_tcp_moderate_rcvbuf; __cacheline_group_end(netns_ipv4_read_txrx); /* RX readonly hotpath cache line */ __cacheline_group_begin(netns_ipv4_read_rx); + u8 sysctl_tcp_moderate_rcvbuf; u8 sysctl_ip_early_demux; u8 sysctl_tcp_early_demux; u8 sysctl_tcp_l3mdev_accept; /* 3 bytes hole, try to pack */ int sysctl_tcp_reordering; int sysctl_tcp_rmem[3]; + int sysctl_tcp_rcvbuf_low_rtt; __cacheline_group_end(netns_ipv4_read_rx); + /* ICMP rate limiter hot cache line. */ + __cacheline_group_begin_aligned(icmp); + atomic_t icmp_global_credit; + u32 icmp_global_stamp; + __cacheline_group_end_aligned(icmp); + struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; @@ -135,12 +142,12 @@ struct netns_ipv4 { u8 sysctl_icmp_echo_ignore_broadcasts; u8 sysctl_icmp_ignore_bogus_error_responses; u8 sysctl_icmp_errors_use_inbound_ifaddr; + u8 sysctl_icmp_errors_extension_mask; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_msgs_per_sec; int sysctl_icmp_msgs_burst; - atomic_t icmp_global_credit; - u32 icmp_global_stamp; + u32 ip_rt_min_pmtu; int ip_rt_mtu_expires; int ip_rt_min_advmss; @@ -220,6 +227,7 @@ struct netns_ipv4 { int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; unsigned int sysctl_tcp_child_ehash_entries; + int sysctl_tcp_comp_sack_rtt_percent; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; int sysctl_max_syn_backlog; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 47dc70d8100a..34bdb1308e8f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -30,19 +30,23 @@ struct netns_sysctl_ipv6 { int ip6_rt_min_advmss; u32 multipath_hash_fields; u8 multipath_hash_policy; - u8 bindv6only; + + __cacheline_group_begin(sysctl_ipv6_flowlabel); u8 flowlabel_consistency; u8 auto_flowlabels; - int icmpv6_time; + u8 flowlabel_state_ranges; + __cacheline_group_end(sysctl_ipv6_flowlabel); + u8 icmpv6_echo_ignore_all; u8 icmpv6_echo_ignore_multicast; u8 icmpv6_echo_ignore_anycast; + int icmpv6_time; DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1); unsigned long *icmpv6_ratemask_ptr; u8 anycast_src_echo_reply; + u8 bindv6only; u8 ip_nonlocal_bind; u8 fwmark_reflect; - u8 flowlabel_state_ranges; int idgen_retries; int idgen_delay; int flowlabel_reflect; @@ -56,6 +60,7 @@ struct netns_sysctl_ipv6 { u8 skip_notify_on_dev_down; u8 fib_notify_on_flag_change; u8 icmpv6_error_anycast_as_unicast; + u8 icmpv6_errors_extension_mask; }; struct netns_ipv6 { diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 19ad2574b267..6682e51513ef 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -16,6 +16,7 @@ struct netns_mpls { int default_ttl; size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; + struct mutex platform_mutex; struct ctl_table_header *ctl; }; diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index fc752a50f91b..ed24c9f638ee 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -17,6 +17,9 @@ struct netns_smc { #ifdef CONFIG_SYSCTL struct ctl_table_header *smc_hdr; #endif +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + struct smc_hs_ctrl __rcu *hs_ctrl; +#endif /* CONFIG_SMC_HS_CTRL_BPF */ unsigned int sysctl_autocorking_size; unsigned int sysctl_smcr_buf_type; int sysctl_smcr_testlink_time; @@ -24,5 +27,7 @@ struct netns_smc { int sysctl_rmem; int sysctl_max_links_per_lgr; int sysctl_max_conns_per_lgr; + unsigned int sysctl_smcr_max_send_wr; + unsigned int sysctl_smcr_max_recv_wr; }; #endif diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h new file mode 100644 index 000000000000..dc8cbe45f406 --- /dev/null +++ b/include/net/netns/vsock.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_NET_NAMESPACE_VSOCK_H +#define __NET_NET_NAMESPACE_VSOCK_H + +#include <linux/types.h> + +enum vsock_net_mode { + VSOCK_NET_MODE_GLOBAL, + VSOCK_NET_MODE_LOCAL, +}; + +struct netns_vsock { + struct ctl_table_header *sysctl_hdr; + + /* protected by the vsock_table_lock in af_vsock.c */ + u32 port; + + enum vsock_net_mode mode; + enum vsock_net_mode child_ns_mode; + + /* 0 = unlocked, 1 = locked to global, 2 = locked to local */ + int child_ns_mode_locked; +}; +#endif /* __NET_NET_NAMESPACE_VSOCK_H */ diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 127e6c7d910d..c54df042db6b 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -219,6 +219,8 @@ static inline void nfc_free_device(struct nfc_dev *dev) int nfc_register_device(struct nfc_dev *dev); +void nfc_unregister_rfkill(struct nfc_dev *dev); +void nfc_remove_device(struct nfc_dev *dev); void nfc_unregister_device(struct nfc_dev *dev); /** diff --git a/include/net/nl802154.h b/include/net/nl802154.h index a994dea74596..442822746e92 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -191,14 +191,12 @@ enum nl802154_iftype { * @NL802154_CAP_ATTR_CHANNELS: a nested attribute for nl802154_channel_attr * @NL802154_CAP_ATTR_TX_POWERS: a nested attribute for * nl802154_wpan_phy_tx_power - * @NL802154_CAP_ATTR_MIN_CCA_ED_LEVEL: minimum value for cca_ed_level - * @NL802154_CAP_ATTR_MAX_CCA_ED_LEVEL: maximum value for cca_ed_level * @NL802154_CAP_ATTR_CCA_MODES: nl802154_cca_modes flags * @NL802154_CAP_ATTR_CCA_OPTS: nl802154_cca_opts flags * @NL802154_CAP_ATTR_MIN_MINBE: minimum of minbe value * @NL802154_CAP_ATTR_MAX_MINBE: maximum of minbe value * @NL802154_CAP_ATTR_MIN_MAXBE: minimum of maxbe value - * @NL802154_CAP_ATTR_MAX_MINBE: maximum of maxbe value + * @NL802154_CAP_ATTR_MAX_MAXBE: maximum of maxbe value * @NL802154_CAP_ATTR_MIN_CSMA_BACKOFFS: minimum of csma backoff value * @NL802154_CAP_ATTR_MAX_CSMA_BACKOFFS: maximum of csma backoffs value * @NL802154_CAP_ATTR_MIN_FRAME_RETRIES: minimum of frame retries value @@ -364,6 +362,7 @@ enum nl802154_cca_opts { NL802154_CCA_OPT_ENERGY_CARRIER_AND, NL802154_CCA_OPT_ENERGY_CARRIER_OR, + /* private: */ /* keep last */ __NL802154_CCA_OPT_ATTR_AFTER_LAST, NL802154_CCA_OPT_ATTR_MAX = __NL802154_CCA_OPT_ATTR_AFTER_LAST - 1 diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 1509a536cb85..cdd95477af7a 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -161,6 +161,7 @@ struct memory_provider_ops; struct pp_memory_provider_params { void *mp_priv; const struct memory_provider_ops *mp_ops; + u32 rx_page_size; }; struct page_pool { @@ -246,7 +247,7 @@ struct page_pool { /* User-facing fields, protected by page_pools_lock */ struct { struct hlist_node list; - u64 detach_time; + ktime_t detach_time; u32 id; } user; }; diff --git a/include/net/phy/realtek_phy.h b/include/net/phy/realtek_phy.h new file mode 100644 index 000000000000..d683bc1b0659 --- /dev/null +++ b/include/net/phy/realtek_phy.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _REALTEK_PHY_H +#define _REALTEK_PHY_H + +#define PHY_ID_RTL_DUMMY_SFP 0x001ccbff + +#endif /* _REALTEK_PHY_H */ diff --git a/include/net/ping.h b/include/net/ping.h index 9634b8800814..05bfd594a64c 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -58,7 +58,7 @@ void ping_unhash(struct sock *sk); int ping_init_sock(struct sock *sk); void ping_close(struct sock *sk, long timeout); -int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int ping_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 8a75c73fc555..18a419cd9d94 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -25,11 +25,6 @@ struct qdisc_walker { const struct Qdisc * : (const void *)&q->privdata, \ struct Qdisc * : (void *)&q->privdata) -static inline struct Qdisc *qdisc_from_priv(void *priv) -{ - return container_of(priv, struct Qdisc, privdata); -} - /* Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth @@ -48,7 +43,6 @@ static inline struct Qdisc *qdisc_from_priv(void *priv) */ typedef u64 psched_time_t; -typedef long psched_tdiff_t; /* Avoid doing 64 bit divide */ #define PSCHED_SHIFT 6 @@ -120,12 +114,13 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, void __qdisc_run(struct Qdisc *q); -static inline void qdisc_run(struct Qdisc *q) +static inline struct sk_buff *qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { __qdisc_run(q); - qdisc_run_end(q); + return qdisc_run_end(q); } + return NULL; } extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -313,4 +308,28 @@ static inline unsigned int qdisc_peek_len(struct Qdisc *sch) return len; } +static inline void qdisc_lock_init(struct Qdisc *sch, + const struct Qdisc_ops *ops) +{ + spin_lock_init(&sch->q.lock); + + /* Skip dynamic keys if nesting is not possible */ + if (ops->static_flags & TCQ_F_INGRESS || + ops == &noqueue_qdisc_ops) + return; + + lockdep_register_key(&sch->root_lock_key); + lockdep_set_class(&sch->q.lock, &sch->root_lock_key); +} + +static inline void qdisc_lock_uninit(struct Qdisc *sch, + const struct Qdisc_ops *ops) +{ + if (ops->static_flags & TCQ_F_INGRESS || + ops == &noqueue_qdisc_ops) + return; + + lockdep_unregister_key(&sch->root_lock_key); +} + #endif diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index 8e91a8fa31b5..ad6d703ce6fe 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -35,6 +35,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) mem_cgroup_sk_under_memory_pressure(sk)) return true; + if (sk->sk_bypass_prot_mem) + return false; + return !!READ_ONCE(*sk->sk_prot->memory_pressure); } diff --git a/include/net/psp/types.h b/include/net/psp/types.h index 31cee64b7c86..25a9096d4e7d 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -59,6 +59,10 @@ struct psp_dev_config { * device key * @stale_assocs: associations which use a rotated out key * + * @stats: statistics maintained by the core + * @stats.rotations: See stats attr key-rotations + * @stats.stales: See stats attr stale-events + * * @rcu: RCU head for freeing the structure */ struct psp_dev { @@ -81,6 +85,11 @@ struct psp_dev { struct list_head prev_assocs; struct list_head stale_assocs; + struct { + unsigned long rotations; + unsigned long stales; + } stats; + struct rcu_head rcu; }; @@ -141,6 +150,22 @@ struct psp_assoc { u8 drv_data[] __aligned(8); }; +struct psp_dev_stats { + union { + struct { + u64 rx_packets; + u64 rx_bytes; + u64 rx_auth_fail; + u64 rx_error; + u64 rx_bad; + u64 tx_packets; + u64 tx_bytes; + u64 tx_error; + }; + DECLARE_FLEX_ARRAY(u64, required); + }; +}; + /** * struct psp_dev_ops - netdev driver facing PSP callbacks */ @@ -179,6 +204,13 @@ struct psp_dev_ops { * Remove an association from the device. */ void (*tx_key_del)(struct psp_dev *psd, struct psp_assoc *pas); + + /** + * @get_stats: get statistics from the device + * Stats required by the spec must be maintained and filled in. + * Stats must be filled in member-by-member, never memset the struct. + */ + void (*get_stats)(struct psp_dev *psd, struct psp_dev_stats *stats); }; #endif /* __NET_PSP_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cd4d4cf71d0d..5a9c826a7092 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -36,7 +36,6 @@ struct request_sock_ops { struct sk_buff *skb, enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); - void (*syn_ack_timeout)(const struct request_sock *req); }; struct saved_syn { @@ -124,14 +123,7 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, return sk; } -static inline void __reqsk_free(struct request_sock *req) -{ - req->rsk_ops->destructor(req); - if (req->rsk_listener) - sock_put(req->rsk_listener); - kfree(req->saved_syn); - kmem_cache_free(req->rsk_ops->slab, req); -} +void __reqsk_free(struct request_sock *req); static inline void reqsk_free(struct request_sock *req) { @@ -197,8 +189,6 @@ struct request_sock_queue { */ }; -void reqsk_queue_alloc(struct request_sock_queue *queue); - void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 738cd5b13c62..c3d657359a3d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -41,13 +41,6 @@ enum qdisc_state_t { __QDISC_STATE_DRAINING, }; -enum qdisc_state2_t { - /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. - * Use qdisc_run_begin/end() or qdisc_is_running() instead. - */ - __QDISC_STATE2_RUNNING, -}; - #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) #define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) @@ -95,6 +88,8 @@ struct Qdisc { #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ +#define TCQ_F_DEQUEUE_DROPS 0x400 /* ->dequeue() can drop packets in q->to_free */ + u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; @@ -110,20 +105,30 @@ struct Qdisc { int pad; refcount_t refcnt; - /* - * For performance sake on SMP, we put highly modified fields at the end - */ - struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; - struct qdisc_skb_head q; - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - int owner; - unsigned long state; - unsigned long state2; /* must be written under qdisc spinlock */ - struct Qdisc *next_sched; - struct sk_buff_head skb_bad_txq; - - spinlock_t busylock ____cacheline_aligned_in_smp; + /* Cache line potentially dirtied in dequeue() or __netif_reschedule(). */ + __cacheline_group_begin(Qdisc_read_mostly) ____cacheline_aligned; + struct sk_buff_head gso_skb; + struct Qdisc *next_sched; + struct sk_buff_head skb_bad_txq; + __cacheline_group_end(Qdisc_read_mostly); + + /* Fields dirtied in dequeue() fast path. */ + __cacheline_group_begin(Qdisc_write) ____cacheline_aligned; + struct qdisc_skb_head q; + unsigned long state; + struct gnet_stats_basic_sync bstats; + bool running; /* must be written under qdisc spinlock */ + + /* Note : we only change qstats.backlog in fast path. */ + struct gnet_stats_queue qstats; + + struct sk_buff *to_free; + __cacheline_group_end(Qdisc_write); + + + atomic_long_t defer_count ____cacheline_aligned_in_smp; + struct llist_head defer_list; + spinlock_t seqlock; struct rcu_head rcu; @@ -168,7 +173,7 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); - return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + return READ_ONCE(qdisc->running); } static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) @@ -211,11 +216,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) */ return spin_trylock(&qdisc->seqlock); } - return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + if (READ_ONCE(qdisc->running)) + return false; + WRITE_ONCE(qdisc->running, true); + return true; } -static inline void qdisc_run_end(struct Qdisc *qdisc) +static inline struct sk_buff *qdisc_run_end(struct Qdisc *qdisc) { + struct sk_buff *to_free = NULL; + if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); @@ -228,9 +238,16 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) __netif_schedule(qdisc); - } else { - __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + return NULL; } + + if (qdisc->flags & TCQ_F_DEQUEUE_DROPS) { + to_free = qdisc->to_free; + if (to_free) + qdisc->to_free = NULL; + } + WRITE_ONCE(qdisc->running, false); + return to_free; } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) @@ -432,13 +449,16 @@ struct tcf_proto { }; struct qdisc_skb_cb { - struct { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; - }; + unsigned int pkt_len; + u16 pkt_segs; + u16 tc_classid; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; + + u16 slave_dev_queue_mapping; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); @@ -696,6 +716,34 @@ void qdisc_destroy(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); + +static inline void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; + + qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } +} + #ifdef CONFIG_NET_SCHED int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data); @@ -758,13 +806,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb) static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; + bool nolock; for (; i < dev->num_tx_queues; i++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } } } } @@ -829,6 +887,15 @@ static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) return qdisc_skb_cb(skb)->pkt_len; } +static inline unsigned int qdisc_pkt_segs(const struct sk_buff *skb) +{ + u32 pkt_segs = qdisc_skb_cb(skb)->pkt_segs; + + DEBUG_NET_WARN_ON_ONCE(pkt_segs != + (skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1)); + return pkt_segs; +} + /* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */ enum net_xmit_qdisc_t { __NET_XMIT_STOLEN = 0x00010000, @@ -870,9 +937,7 @@ static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, static inline void bstats_update(struct gnet_stats_basic_sync *bstats, const struct sk_buff *skb) { - _bstats_update(bstats, - qdisc_pkt_len(skb), - skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); + _bstats_update(bstats, qdisc_pkt_len(skb), qdisc_pkt_segs(skb)); } static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, @@ -1067,11 +1132,8 @@ struct tc_skb_cb { struct qdisc_skb_cb qdisc_cb; u32 drop_reason; - u16 zone; /* Only valid if post_ct = true */ + u16 zone; /* Only valid if qdisc_skb_cb(skb)->post_ct = true */ u16 mru; - u8 post_ct:1; - u8 post_ct_snat:1; - u8 post_ct_dnat:1; }; static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) @@ -1094,6 +1156,28 @@ static inline void tcf_set_drop_reason(const struct sk_buff *skb, tc_skb_cb(skb)->drop_reason = reason; } +static inline void tcf_kfree_skb_list(struct sk_buff *skb) +{ + while (unlikely(skb)) { + struct sk_buff *next = skb->next; + + prefetch(next); + kfree_skb_reason(skb, tcf_get_drop_reason(skb)); + skb = next; + } +} + +static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb, + enum skb_drop_reason reason) +{ + DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS)); + DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK); + + tcf_set_drop_reason(skb, reason); + skb->next = q->to_free; + q->to_free = skb; +} + /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ @@ -1373,6 +1457,11 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, struct tcf_block *block); +static inline bool mini_qdisc_pair_inited(struct mini_Qdisc_pair *miniqp) +{ + return !!miniqp->p_miniq; +} + void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx); int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); diff --git a/include/net/sch_priv.h b/include/net/sch_priv.h new file mode 100644 index 000000000000..4789f668ae87 --- /dev/null +++ b/include/net/sch_priv.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_SCHED_PRIV_H +#define __NET_SCHED_PRIV_H + +#include <net/sch_generic.h> + +struct mq_sched { + struct Qdisc **qdiscs; +}; + +int mq_init_common(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack, + const struct Qdisc_ops *qdisc_ops); +void mq_destroy_common(struct Qdisc *sch); +void mq_attach(struct Qdisc *sch); +void mq_dump_common(struct Qdisc *sch, struct sk_buff *skb); +struct netdev_queue *mq_select_queue(struct Qdisc *sch, + struct tcmsg *tcm); +struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl); +unsigned long mq_find(struct Qdisc *sch, u32 classid); +int mq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm); +int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d); +void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg); + +#endif diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index 3d5879e08e78..6f2cd562b1de 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -72,7 +72,6 @@ struct sctp_shared_key *sctp_auth_get_shkey( int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep, struct sctp_association *asoc, gfp_t gfp); -int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp); const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id); const struct sctp_hmac * sctp_auth_asoc_get_hmac(const struct sctp_association *asoc); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e96d1bd087f6..58242b37b47a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -85,7 +85,7 @@ void sctp_udp_sock_stop(struct net *net); /* * sctp/socket.c */ -int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, +int sctp_inet_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); @@ -94,8 +94,7 @@ void sctp_data_ready(struct sock *sk); __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); -void sctp_copy_sock(struct sock *newsk, struct sock *sk, - struct sctp_association *asoc); + extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *); diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 8034bf5febbe..77806ef1cb70 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -52,10 +52,10 @@ void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch); void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); -struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); +const struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); void sctp_sched_ops_register(enum sctp_sched_type sched, - struct sctp_sched_ops *sched_ops); + const struct sctp_sched_ops *sched_ops); void sctp_sched_ops_prio_init(void); void sctp_sched_ops_rr_init(void); void sctp_sched_ops_fc_init(void); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2ae390219efd..affee44bd38e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -228,10 +228,6 @@ struct sctp_sock { atomic_t pd_mode; - /* Fields after this point will be skipped on copies, like on accept - * and peeloff operations - */ - /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; @@ -497,9 +493,6 @@ struct sctp_pf { int (*bind_verify) (struct sctp_sock *, union sctp_addr *); int (*send_verify) (struct sctp_sock *, union sctp_addr *); int (*supported_addrs)(const struct sctp_sock *, __be16 *); - struct sock *(*create_accept_sk) (struct sock *sk, - struct sctp_association *asoc, - bool kern); int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr); void (*to_sk_saddr)(union sctp_addr *, struct sock *sk); void (*to_sk_daddr)(union sctp_addr *, struct sock *sk); @@ -1076,7 +1069,7 @@ struct sctp_outq { struct list_head out_chunk_list; /* Stream scheduler being used */ - struct sctp_sched_ops *sched; + const struct sctp_sched_ops *sched; unsigned int out_qlen; /* Total length of queued data chunks. */ diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index cddebafb9f77..6f996229167b 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -5,16 +5,47 @@ #include <linux/types.h> struct net; +extern struct net init_net; + +union tcp_seq_and_ts_off { + struct { + u32 seq; + u32 ts_off; + }; + u64 hash64; +}; u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); -u32 secure_tcpv6_ts_off(const struct net *net, - const __be32 *saddr, const __be32 *daddr); +union tcp_seq_and_ts_off +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + + return ts.seq; +} + +union tcp_seq_and_ts_off +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, + const __be32 *daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + return ts.seq; +} #endif /* _NET_SECURE_SEQ */ diff --git a/include/net/selftests.h b/include/net/selftests.h index e65e8d230d33..c36e07406ad4 100644 --- a/include/net/selftests.h +++ b/include/net/selftests.h @@ -3,9 +3,48 @@ #define _NET_SELFTESTS #include <linux/ethtool.h> +#include <linux/netdevice.h> + +struct net_packet_attrs { + const unsigned char *src; + const unsigned char *dst; + u32 ip_src; + u32 ip_dst; + bool tcp; + u16 sport; + u16 dport; + int timeout; + int size; + int max_size; + u8 id; + u16 queue_mapping; + bool bad_csum; +}; + +struct net_test_priv { + struct net_packet_attrs *packet; + struct packet_type pt; + struct completion comp; + int double_vlan; + int vlan_id; + int ok; +}; + +struct netsfhdr { + __be32 version; + __be64 magic; + u8 id; +} __packed; + +#define NET_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct netsfhdr)) +#define NET_TEST_PKT_MAGIC 0xdeadcafecafedeadULL +#define NET_LB_TIMEOUT msecs_to_jiffies(200) #if IS_ENABLED(CONFIG_NET_SELFTESTS) +struct sk_buff *net_test_get_skb(struct net_device *ndev, u8 id, + struct net_packet_attrs *attr); void net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf); int net_selftest_get_count(void); @@ -13,6 +52,12 @@ void net_selftest_get_strings(u8 *data); #else +static inline struct sk_buff *net_test_get_skb(struct net_device *ndev, u8 id, + struct net_packet_attrs *attr) +{ + return NULL; +} + static inline void net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) { diff --git a/include/net/smc.h b/include/net/smc.h index 08bee529ed8d..bfdc4c41f019 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -17,6 +17,8 @@ #include <linux/wait.h> #include <linux/dibs.h> +struct tcp_sock; +struct inet_request_sock; struct sock; #define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ @@ -50,4 +52,55 @@ struct smcd_dev { u8 going_away : 1; }; +#define SMC_HS_CTRL_NAME_MAX 16 + +enum { + /* ops can be inherit from init_net */ + SMC_HS_CTRL_FLAG_INHERITABLE = 0x1, + + SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE, +}; + +struct smc_hs_ctrl { + /* private */ + + struct list_head list; + struct module *owner; + + /* public */ + + /* unique name */ + char name[SMC_HS_CTRL_NAME_MAX]; + int flags; + + /* Invoked before computing SMC option for SYN packets. + * We can control whether to set SMC options by returning various value. + * Return 0 to disable SMC, or return any other value to enable it. + */ + int (*syn_option)(struct tcp_sock *tp); + + /* Invoked before Set up SMC options for SYN-ACK packets + * We can control whether to respond SMC options by returning various + * value. Return 0 to disable SMC, or return any other value to enable + * it. + */ + int (*synack_option)(const struct tcp_sock *tp, + struct inet_request_sock *ireq); +}; + +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +#define smc_call_hsbpf(init_val, tp, func, ...) ({ \ + typeof(init_val) __ret = (init_val); \ + struct smc_hs_ctrl *ctrl; \ + rcu_read_lock(); \ + ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \ + if (ctrl && ctrl->func) \ + __ret = ctrl->func(tp, ##__VA_ARGS__); \ + rcu_read_unlock(); \ + __ret; \ +}) +#else +#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); }) +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + #endif /* _SMC_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 60bcb13f045c..6c9a83016e95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -118,6 +118,7 @@ typedef __u64 __bitwise __addrpair; * @skc_reuseport: %SO_REUSEPORT setting * @skc_ipv6only: socket is IPV6 only * @skc_net_refcnt: socket is using net ref counting + * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol @@ -174,6 +175,7 @@ struct sock_common { unsigned char skc_reuseport:1; unsigned char skc_ipv6only:1; unsigned char skc_net_refcnt:1; + unsigned char skc_bypass_prot_mem:1; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -303,6 +305,8 @@ struct sk_filter; * @sk_txrehash: enable TX hash rethink * @sk_filter: socket filtering instructions * @sk_timer: sock cleanup timer + * @tcp_retransmit_timer: tcp retransmit timer + * @mptcp_retransmit_timer: mptcp retransmit timer * @sk_stamp: time stamp of last packet received * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING flags @@ -313,6 +317,7 @@ struct sk_filter; * @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock * for timestamping * @sk_tskey: counter to disambiguate concurrent tstamp requests + * @sk_tx_queue_mapping_jiffies: time in jiffies of last @sk_tx_queue_mapping refresh. * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. @@ -336,6 +341,7 @@ struct sk_filter; * @sk_reuseport_cb: reuseport group container * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage * @sk_rcu: used during RCU grace period + * @sk_freeptr: used for SLAB_TYPESAFE_BY_RCU managed sockets * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME @@ -380,6 +386,7 @@ struct sock { #define sk_reuseport __sk_common.skc_reuseport #define sk_ipv6only __sk_common.skc_ipv6only #define sk_net_refcnt __sk_common.skc_net_refcnt +#define sk_bypass_prot_mem __sk_common.skc_bypass_prot_mem #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot @@ -477,17 +484,21 @@ struct sock { struct rb_root tcp_rtx_queue; }; struct sk_buff_head sk_write_queue; - u32 sk_dst_pending_confirm; - u32 sk_pacing_status; /* see enum sk_pacing */ struct page_frag sk_frag; - struct timer_list sk_timer; - + union { + struct timer_list sk_timer; + struct timer_list tcp_retransmit_timer; + struct timer_list mptcp_retransmit_timer; + }; unsigned long sk_pacing_rate; /* bytes per second */ atomic_t sk_zckey; atomic_t sk_tskey; + unsigned long sk_tx_queue_mapping_jiffies; __cacheline_group_end(sock_write_tx); __cacheline_group_begin(sock_read_tx); + u32 sk_dst_pending_confirm; + u32 sk_pacing_status; /* see enum sk_pacing */ unsigned long sk_max_pacing_rate; long sk_sndtimeo; u32 sk_priority; @@ -572,7 +583,14 @@ struct sock { struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct numa_drop_counters *sk_drop_counters; - struct rcu_head sk_rcu; + /* sockets using SLAB_TYPESAFE_BY_RCU can use sk_freeptr. + * By the time kfree() is called, sk_rcu can not be in + * use and can be mangled. + */ + union { + struct rcu_head sk_rcu; + freeptr_t sk_freeptr; + }; netns_tracker ns_tracker; struct xarray sk_user_frags; @@ -828,11 +846,9 @@ static inline bool sk_del_node_init(struct sock *sk) { bool rc = __sk_del_node_init(sk); - if (rc) { - /* paranoid for a while -acme */ - WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + if (rc) __sock_put(sk); - } + return rc; } #define sk_del_node_init_rcu(sk) sk_del_node_init(sk) @@ -850,14 +866,25 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) { bool rc = __sk_nulls_del_node_init_rcu(sk); - if (rc) { - /* paranoid for a while -acme */ - WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + if (rc) __sock_put(sk); - } + return rc; } +static inline bool sk_nulls_replace_node_init_rcu(struct sock *old, + struct sock *new) +{ + if (sk_hashed(old)) { + hlist_nulls_replace_init_rcu(&old->sk_nulls_node, + &new->sk_nulls_node); + __sock_put(old); + return true; + } + + return false; +} + static inline void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); @@ -1260,10 +1287,10 @@ struct proto { void (*close)(struct sock *sk, long timeout); int (*pre_connect)(struct sock *sk, - struct sockaddr *uaddr, + struct sockaddr_unsized *uaddr, int addr_len); int (*connect)(struct sock *sk, - struct sockaddr *uaddr, + struct sockaddr_unsized *uaddr, int addr_len); int (*disconnect)(struct sock *sk, int flags); @@ -1292,9 +1319,9 @@ struct proto { size_t len, int flags, int *addr_len); void (*splice_eof)(struct socket *sock); int (*bind)(struct sock *sk, - struct sockaddr *addr, int addr_len); + struct sockaddr_unsized *addr, int addr_len); int (*bind_add)(struct sock *sk, - struct sockaddr *addr, int addr_len); + struct sockaddr_unsized *addr, int addr_len); int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); @@ -1349,6 +1376,7 @@ struct proto { struct kmem_cache *slab; unsigned int obj_size; + unsigned int freeptr_offset; unsigned int ipv6_pinfo_offset; slab_flags_t slab_flags; unsigned int useroffset; /* Usercopy region offset */ @@ -1617,6 +1645,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) sk_mem_reclaim(sk); } +void __sk_charge(struct sock *sk, gfp_t gfp); + #if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) static inline void sk_owner_set(struct sock *sk, struct module *owner) { @@ -1808,7 +1838,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, void sk_free(struct sock *sk); void sk_net_refcnt_upgrade(struct sock *sk); void sk_destruct(struct sock *sk); -struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); +struct sock *sk_clone(const struct sock *sk, const gfp_t priority, bool lock); + +static inline struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) +{ + return sk_clone(sk, priority, true); +} struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); @@ -1901,8 +1936,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, * Functions to fill in entries in struct proto_ops when a protocol * does not implement a particular function. */ -int sock_no_bind(struct socket *, struct sockaddr *, int); -int sock_no_connect(struct socket *, struct sockaddr *, int, int); +int sock_no_bind(struct socket *sock, struct sockaddr_unsized *saddr, int len); +int sock_no_connect(struct socket *sock, struct sockaddr_unsized *saddr, int len, int flags); int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *); int sock_no_getname(struct socket *, struct sockaddr *, int); @@ -1992,7 +2027,15 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) /* Paired with READ_ONCE() in sk_tx_queue_get() and * other WRITE_ONCE() because socket lock might be not held. */ - WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); + if (READ_ONCE(sk->sk_tx_queue_mapping) != tx_queue) { + WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); + WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies); + return; + } + + /* Refresh sk_tx_queue_mapping_jiffies if too old. */ + if (time_is_before_jiffies(READ_ONCE(sk->sk_tx_queue_mapping_jiffies) + HZ)) + WRITE_ONCE(sk->sk_tx_queue_mapping_jiffies, jiffies); } #define NO_QUEUE_MAPPING USHRT_MAX @@ -2005,19 +2048,7 @@ static inline void sk_tx_queue_clear(struct sock *sk) WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING); } -static inline int sk_tx_queue_get(const struct sock *sk) -{ - if (sk) { - /* Paired with WRITE_ONCE() in sk_tx_queue_clear() - * and sk_tx_queue_set(). - */ - int val = READ_ONCE(sk->sk_tx_queue_mapping); - - if (val != NO_QUEUE_MAPPING) - return val; - } - return -1; -} +int sk_tx_queue_get(const struct sock *sk); static inline void __sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb, @@ -2067,7 +2098,7 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk->sk_socket = sock; + WRITE_ONCE(sk->sk_socket, sock); if (sock) { WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); @@ -2303,6 +2334,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro return 0; } +#define SK_WMEM_ALLOC_BIAS 1 /** * sk_wmem_alloc_get - returns write allocations * @sk: socket @@ -2311,7 +2343,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro */ static inline int sk_wmem_alloc_get(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) - 1; + return refcount_read(&sk->sk_wmem_alloc) - SK_WMEM_ALLOC_BIAS; } /** @@ -2596,12 +2628,16 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); +static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc) +{ + return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1); +} /* * Default write policy as shown to user space via poll/select/SIGIO */ static inline bool sock_writeable(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); + return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc)); } static inline gfp_t gfp_any(void) @@ -2635,8 +2671,12 @@ static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) #endif /* CONFIG_MEMCG_V1 */ do { - if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) + if (time_before64(get_jiffies_64(), + mem_cgroup_get_socket_pressure(memcg))) { + memcg_memory_event(mem_cgroup_from_sk(sk), + MEMCG_SOCK_THROTTLED); return true; + } } while ((memcg = parent_mem_cgroup(memcg))); return false; @@ -3085,7 +3125,7 @@ void sock_set_reuseaddr(struct sock *sk); void sock_set_reuseport(struct sock *sk); void sock_set_sndtimeo(struct sock *sk, s64 secs); -int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +int sock_bind_add(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); int sock_get_timeout(long timeo, void *optval, bool old_timeval); int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index c1a67149c6b6..e0fded18e18c 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -32,6 +32,7 @@ struct tcf_gate_params { s32 tcfg_clockid; size_t num_entries; struct list_head entries; + struct rcu_head rcu; }; #define GATE_ACT_GATE_OPEN BIT(0) @@ -39,7 +40,7 @@ struct tcf_gate_params { struct tcf_gate { struct tc_action common; - struct tcf_gate_params param; + struct tcf_gate_params __rcu *param; u8 current_gate_status; ktime_t current_close_time; u32 current_entry_octets; @@ -51,47 +52,65 @@ struct tcf_gate { #define to_gate(a) ((struct tcf_gate *)a) +static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a) +{ + struct tcf_gate *gact = to_gate(a); + + return rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); +} + static inline s32 tcf_gate_prio(const struct tc_action *a) { + struct tcf_gate_params *p; s32 tcfg_prio; - tcfg_prio = to_gate(a)->param.tcfg_priority; + p = tcf_gate_params_locked(a); + tcfg_prio = p->tcfg_priority; return tcfg_prio; } static inline u64 tcf_gate_basetime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_basetime; - tcfg_basetime = to_gate(a)->param.tcfg_basetime; + p = tcf_gate_params_locked(a); + tcfg_basetime = p->tcfg_basetime; return tcfg_basetime; } static inline u64 tcf_gate_cycletime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletime; - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; + p = tcf_gate_params_locked(a); + tcfg_cycletime = p->tcfg_cycletime; return tcfg_cycletime; } static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletimeext; - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; + p = tcf_gate_params_locked(a); + tcfg_cycletimeext = p->tcfg_cycletime_ext; return tcfg_cycletimeext; } static inline u32 tcf_gate_num_entries(const struct tc_action *a) { + struct tcf_gate_params *p; u32 num_entries; - num_entries = to_gate(a)->param.num_entries; + p = tcf_gate_params_locked(a); + num_entries = p->num_entries; return num_entries; } @@ -105,7 +124,7 @@ static inline struct action_gate_entry u32 num_entries; int i = 0; - p = &to_gate(a)->param; + p = tcf_gate_params_locked(a); num_entries = p->num_entries; list_for_each_entry(entry, &p->entries, list) @@ -114,7 +133,7 @@ static inline struct action_gate_entry if (i != num_entries) return NULL; - oe = kcalloc(num_entries, sizeof(*oe), GFP_ATOMIC); + oe = kzalloc_objs(*oe, num_entries, GFP_ATOMIC); if (!oe) return NULL; diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index c7f24a2da1ca..24d4d5a62b3c 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -13,15 +13,13 @@ struct tcf_ife_params { u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; - + struct list_head metalist; struct rcu_head rcu; }; struct tcf_ife_info { struct tc_action common; struct tcf_ife_params __rcu *params; - /* list of metaids allowed */ - struct list_head metalist; }; #define to_ife(a) ((struct tcf_ife_info *)a) diff --git a/include/net/tcp.h b/include/net/tcp.h index ab20f549b8f9..978eea2d5df0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -43,6 +43,7 @@ #include <net/dst.h> #include <net/mptcp.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -303,6 +304,9 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk) mem_cgroup_sk_under_memory_pressure(sk)) return true; + if (sk->sk_bypass_prot_mem) + return false; + return READ_ONCE(tcp_memory_pressure); } /* @@ -344,6 +348,15 @@ extern struct proto tcp_prot; #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) +/* + * TCP splice context + */ +struct tcp_splice_state { + struct pipe_inode_info *pipe; + size_t len; + unsigned int flags; +}; + void tcp_tsq_work_init(void); int tcp_v4_err(struct sk_buff *skb, u32); @@ -375,6 +388,8 @@ void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); void tcp_twsk_purge(struct list_head *net_exit_list); +int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, + unsigned int offset, size_t len); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -461,6 +476,8 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, void tcp_enter_loss(struct sock *sk); void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); +void tcp_update_pacing_rate(struct sock *sk); +void tcp_set_rto(struct sock *sk); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); @@ -528,14 +545,17 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req); + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)); int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); -int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int tcp_v4_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int tcp_connect(struct sock *sk); enum tcp_synack_type { TCP_SYNACK_NORMAL, TCP_SYNACK_FASTOPEN, TCP_SYNACK_COOKIE, + TCP_SYNACK_RETRANS, }; struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, @@ -746,7 +766,15 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); -void tcp_check_space(struct sock *sk); +void __tcp_check_space(struct sock *sk); +static inline void tcp_check_space(struct sock *sk) +{ + /* pairs with tcp_poll() */ + smp_mb(); + + if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + __tcp_check_space(sk); +} void tcp_sack_compress_send_ack(struct sock *sk); static inline void tcp_cleanup_skb(struct sk_buff *skb) @@ -804,6 +832,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); +void tcp_rate_check_app_limited(struct sock *sk); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, @@ -836,6 +865,14 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } +static inline unsigned long tcp_reqsk_timeout(struct request_sock *req) +{ + u64 timeout = (u64)req->timeout << req->num_timeout; + + return (unsigned long)min_t(u64, timeout, + tcp_rto_max(req->rsk_listener)); +} + u32 tcp_delack_max(const struct sock *sk); /* Compute the actual rto_min value */ @@ -1190,7 +1227,15 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED BIT(0) /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN BIT(1) -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Require successfully negotiated AccECN capability */ +#define TCP_CONG_NEEDS_ACCECN BIT(2) +/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */ +#define TCP_CONG_ECT_1_NEGOTIATION BIT(3) +/* Cannot fallback to RFC3168 during AccECN negotiation */ +#define TCP_CONG_NO_FALLBACK_RFC3168 BIT(4) +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \ + TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION | \ + TCP_CONG_NO_FALLBACK_RFC3168) union tcp_cc_info; @@ -1230,12 +1275,27 @@ struct rate_sample { struct tcp_congestion_ops { /* fast path fields are put first to fill one cache line */ + /* A congestion control (CC) must provide one of either: + * + * (a) a cong_avoid function, if the CC wants to use the core TCP + * stack's default functionality to implement a "classic" + * (Reno/CUBIC-style) response to packet loss, RFC3168 ECN, + * idle periods, pacing rate computations, etc. + * + * (b) a cong_control function, if the CC wants custom behavior and + * complete control of all congestion control behaviors. + */ + /* (a) "classic" response: calculate new cwnd. + */ + void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked); + /* (b) "custom" response: call when packets are delivered to update + * cwnd and pacing rate, after all the ca_state processing. + */ + void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); + /* return slow start threshold (required) */ u32 (*ssthresh)(struct sock *sk); - /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked); - /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); @@ -1248,15 +1308,9 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ + /* override sysctl_tcp_min_tso_segs (optional) */ u32 (*min_tso_segs)(struct sock *sk); - /* call when packets are delivered to update cwnd and pacing rate, - * after all the ca_state processing. (optional) - */ - void (*cong_control)(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs); - - /* new value of cwnd after loss (required) */ u32 (*undo_cwnd)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ @@ -1322,6 +1376,27 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } +static inline bool tcp_ca_needs_accecn(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN; +} + +static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION; +} + +static inline bool tcp_ca_no_fallback_rfc3168(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_NO_FALLBACK_RFC3168; +} + static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1333,13 +1408,6 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) /* From tcp_cong.c */ void tcp_set_ca_state(struct sock *sk, const u8 ca_state); -/* From tcp_rate.c */ -void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); -void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - struct rate_sample *rs); -void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - bool is_sack_reneg, struct rate_sample *rs); -void tcp_rate_check_app_limited(struct sock *sk); static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) { @@ -1568,8 +1636,14 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); +static inline int tcp_filter(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason) +{ + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th), reason); +} -int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); int tcp_abort(struct sock *sk, int err); @@ -1896,13 +1970,6 @@ struct tcp6_pseudohdr { __be32 protocol; /* including padding */ }; -union tcp_md5sum_block { - struct tcp4_pseudohdr ip4; -#if IS_ENABLED(CONFIG_IPV6) - struct tcp6_pseudohdr ip6; -#endif -}; - /* * struct tcp_sigpool - per-CPU pool of ahash_requests * @scratch: per-CPU temporary area, that can be used between @@ -1937,8 +2004,8 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); void tcp_sigpool_end(struct tcp_sigpool *c); size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len); /* - functions */ -int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, - const struct sock *sk, const struct sk_buff *skb); +void tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags, const u8 *newkey, u8 newkeylen); @@ -1997,13 +2064,10 @@ static inline void tcp_md5_destruct_sock(struct sock *sk) } #endif -int tcp_md5_alloc_sigpool(void); -void tcp_md5_release_sigpool(void); -void tcp_md5_add_sigpool(void); -extern int tcp_md5_sigpool_id; - -int tcp_md5_hash_key(struct tcp_sigpool *hp, - const struct tcp_md5sig_key *key); +struct md5_ctx; +void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb, + unsigned int header_len); +void tcp_md5_hash_key(struct md5_ctx *ctx, const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, @@ -2310,14 +2374,11 @@ void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); -struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb); struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); #ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); #else @@ -2353,7 +2414,7 @@ struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk, const struct sock *addr_sk); - int (*calc_md5_hash)(char *location, + void (*calc_md5_hash)(char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, const struct sk_buff *skb); @@ -2381,7 +2442,7 @@ struct tcp_request_sock_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk, const struct sock *addr_sk); - int (*calc_md5_hash) (char *location, + void (*calc_md5_hash) (char *location, const struct tcp_md5sig_key *md5, const struct sock *sk, const struct sk_buff *skb); @@ -2404,8 +2465,9 @@ struct tcp_request_sock_ops { struct flowi *fl, struct request_sock *req, u32 tw_isn); - u32 (*init_seq)(const struct sk_buff *skb); - u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); + union tcp_seq_and_ts_off (*init_seq_and_ts_off)( + const struct net *net, + const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, @@ -2511,10 +2573,7 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); extern bool tcp_rack_mark_lost(struct sock *sk); -extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); -extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* tcp_plb.c */ diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index f13e5cd2b1ac..e9a933641636 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -29,8 +29,15 @@ enum tcp_accecn_option { TCP_ACCECN_OPTION_DISABLED = 0, TCP_ACCECN_OPTION_MINIMUM = 1, TCP_ACCECN_OPTION_FULL = 2, + TCP_ACCECN_OPTION_PERSIST = 3, }; +/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ +static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk) +{ + __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk)); +} + static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) { /* Do not set CWR if in AccECN mode! */ @@ -60,12 +67,6 @@ static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } -/* tp->accecn_fail_mode */ -#define TCP_ACCECN_ACE_FAIL_SEND BIT(0) -#define TCP_ACCECN_ACE_FAIL_RECV BIT(1) -#define TCP_ACCECN_OPT_FAIL_SEND BIT(2) -#define TCP_ACCECN_OPT_FAIL_RECV BIT(3) - static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp) { return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND; @@ -91,11 +92,6 @@ static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode) tp->accecn_fail_mode |= mode; } -#define TCP_ACCECN_OPT_NOT_SEEN 0x0 -#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1 -#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2 -#define TCP_ACCECN_OPT_FAIL_SEEN 0x3 - static inline u8 tcp_accecn_ace(const struct tcphdr *th) { return (th->ae << 2) | (th->cwr << 1) | th->ece; @@ -169,7 +165,9 @@ static inline void tcp_accecn_third_ack(struct sock *sk, switch (ace) { case 0x0: /* Invalid value */ - tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); + if (!TCP_SKB_CB(skb)->sacked) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV | + TCP_ACCECN_OPT_FAIL_RECV); break; case 0x7: case 0x5: @@ -398,6 +396,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp) tp->received_ce_pending = 0; __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); + tp->accecn_opt_sent_w_dsack = 0; tp->accecn_minlen = 0; tp->accecn_opt_demand = 0; tp->est_ecnfield = 0; @@ -467,6 +466,26 @@ static inline u8 tcp_accecn_option_init(const struct sk_buff *skb, return TCP_ACCECN_OPT_COUNTER_SEEN; } +static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk, + const struct sk_buff *skb, u8 dsf) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); + tp->syn_ect_rcv = dsf & INET_ECN_MASK; + /* Demand Accurate ECN option in response to the SYN on the SYN/ACK + * and the TCP server will try to send one more packet with an AccECN + * Option at a later point during the connection. + */ + if (tp->rx_opt.accecn && + tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { + u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); + + tcp_accecn_saw_opt_fail_recv(tp, saw_opt); + tp->accecn_opt_demand = 2; + } +} + /* See Table 2 of the AccECN draft */ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb, const struct tcphdr *th, u8 ip_dsfield) @@ -489,32 +508,32 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); break; case 0x1: - case 0x5: /* +========+========+============+=============+ * | A | B | SYN/ACK | Feedback | * | | | B->A | Mode of A | * | | | AE CWR ECE | | * +========+========+============+=============+ - * | AccECN | Nonce | 1 0 1 | (Reserved) | * | AccECN | ECN | 0 0 1 | Classic ECN | * | Nonce | AccECN | 0 0 1 | Classic ECN | * | ECN | AccECN | 0 0 1 | Classic ECN | * +========+========+============+=============+ */ - if (tcp_ecn_mode_pending(tp)) - /* Downgrade from AccECN, or requested initially */ + if (tcp_ca_no_fallback_rfc3168(sk)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); + else tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); break; - default: - tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); - tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; - if (tp->rx_opt.accecn && - tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { - u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); - - tcp_accecn_saw_opt_fail_recv(tp, saw_opt); - tp->accecn_opt_demand = 2; + case 0x5: + if (tcp_ecn_mode_pending(tp)) { + tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); + if (INET_ECN_is_ce(ip_dsfield)) { + tp->received_ce++; + tp->received_ce_pending++; + } } + break; + default: + tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); if (INET_ECN_is_ce(ip_dsfield) && tcp_accecn_validate_syn_feedback(sk, ace, tp->syn_ect_snt)) { @@ -525,9 +544,11 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb } } -static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, +static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_ecn_mode_pending(tp)) { if (!tcp_accecn_syn_requested(th)) { /* Downgrade to classic ECN feedback */ @@ -539,7 +560,8 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); } } - if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) + if (tcp_ecn_mode_rfc3168(tp) && + (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk))) tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } @@ -561,7 +583,7 @@ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) { TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; @@ -579,7 +601,8 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) bool use_ecn, use_accecn; u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); - use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN; + use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN || + tcp_ca_needs_accecn(sk); use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN || tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN || tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn; @@ -595,7 +618,7 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) if (use_ecn) { if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; if (use_accecn) { @@ -619,12 +642,22 @@ static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) } static inline void -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, + enum tcp_synack_type synack_type) { - if (tcp_rsk(req)->accecn_ok) - tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); - else if (inet_rsk(req)->ecn_ok) - th->ece = 1; + /* Accurate ECN shall retransmit SYN/ACK with ACE=0 if the + * previously retransmitted SYN/ACK also times out. + */ + if (!req->num_timeout || synack_type != TCP_SYNACK_RETRANS) { + if (tcp_rsk(req)->accecn_ok) + tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); + else if (inet_rsk(req)->ecn_ok) + th->ece = 1; + } else if (tcp_rsk(req)->accecn_ok) { + th->ae = 0; + th->cwr = 0; + th->ece = 0; + } } static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) diff --git a/include/net/tls.h b/include/net/tls.h index c7bcdb3afad7..ebd2550280ae 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -53,6 +53,8 @@ struct tls_rec; /* Maximum data size carried in a TLS record */ #define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) +/* Minimum record size limit as per RFC8449 */ +#define TLS_MIN_RECORD_SIZE_LIM ((size_t)1 << 6) #define TLS_HEADER_SIZE 5 #define TLS_NONCE_OFFSET TLS_HEADER_SIZE @@ -226,6 +228,7 @@ struct tls_context { u8 rx_conf:3; u8 zerocopy_sendfile:1; u8 rx_no_pad:1; + u16 tx_max_payload_len; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); diff --git a/include/net/udp.h b/include/net/udp.h index cffedb3e40f2..da68702ddf6e 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -294,8 +294,7 @@ static inline int udp_lib_init_sock(struct sock *sk) up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); - up->udp_prod_queue = kcalloc(nr_node_ids, sizeof(*up->udp_prod_queue), - GFP_KERNEL); + up->udp_prod_queue = kzalloc_objs(*up->udp_prod_queue, nr_node_ids); if (!up->udp_prod_queue) return -ENOMEM; for (int i = 0; i < nr_node_ids; i++) @@ -424,7 +423,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); int udp_init_sock(struct sock *sk); -int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); @@ -527,18 +526,18 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, * SNMP statistics for UDP and UDP-Lite */ #define UDP_INC_STATS(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ + if (unlikely(is_udplite)) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP_INC_STATS(net, field, is_udplite) do { \ - if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ + if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) #define __UDP6_INC_STATS(net, field, is_udplite) do { \ - if (is_udplite) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field);\ + if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) #define UDP6_INC_STATS(net, field, __lite) do { \ - if (__lite) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ + if (unlikely(__lite)) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ } while(0) diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 9acef2fbd2fd..fc1fc43345b5 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -10,6 +10,11 @@ #include <net/ipv6_stubs.h> #endif +#define UDP_TUNNEL_PARTIAL_FEATURES NETIF_F_GSO_ENCAP_ALL +#define UDP_TUNNEL_STRIPPED_GSO_TYPES ((UDP_TUNNEL_PARTIAL_FEATURES | \ + NETIF_F_GSO_PARTIAL) >> \ + NETIF_F_GSO_SHIFT) + struct udp_port_cfg { u8 family; @@ -47,7 +52,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { - return 0; + return -EPFNOSUPPORT; } #endif @@ -145,6 +150,33 @@ void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, __be16 src_port, __be16 dst_port, bool nocheck, u16 ip6cb_flags); +static inline bool udp_tunnel_handle_partial(struct sk_buff *skb) +{ + bool double_encap = !!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL); + + /* + * If the skb went through partial segmentation, lower devices + * will not need to offload the related features - except for + * UDP_TUNNEL, that will be re-added by the later + * udp_tunnel_handle_offloads(). + */ + if (double_encap) + skb_shinfo(skb)->gso_type &= ~UDP_TUNNEL_STRIPPED_GSO_TYPES; + return double_encap; +} + +static inline void udp_tunnel_set_inner_protocol(struct sk_buff *skb, + bool double_encap, + __be16 inner_proto) +{ + /* + * The inner protocol has been set by the nested tunnel, don't + * overraid it. + */ + if (!double_encap) + skb_set_inner_protocol(skb, inner_proto); +} + void udp_tunnel_sock_release(struct socket *sock); struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, diff --git a/include/net/vsock_addr.h b/include/net/vsock_addr.h index cf8cc140d68d..c3f4cc206198 100644 --- a/include/net/vsock_addr.h +++ b/include/net/vsock_addr.h @@ -16,7 +16,7 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr); void vsock_addr_unbind(struct sockaddr_vm *addr); bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, const struct sockaddr_vm *other); -int vsock_addr_cast(const struct sockaddr *addr, size_t len, +int vsock_addr_cast(const struct sockaddr_unsized *addr, size_t len, struct sockaddr_vm **out_addr); #endif diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index ce587a225661..23e8861e8b25 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -125,6 +125,7 @@ struct xsk_tx_metadata_ops { int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(struct list_head *flush_list); +INDIRECT_CALLABLE_DECLARE(void xsk_destruct_skb(struct sk_buff *)); /** * xsk_tx_metadata_to_compl - Save enough relevant metadata information @@ -218,6 +219,12 @@ static inline void __xsk_map_flush(struct list_head *flush_list) { } +#ifdef CONFIG_MITIGATION_RETPOLINE +static inline void xsk_destruct_skb(struct sk_buff *skb) +{ +} +#endif + static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, struct xsk_tx_metadata_compl *compl) { diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4f2d3268a676..6b9ebae2dc95 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -12,6 +12,10 @@ #define XDP_UMEM_MIN_CHUNK_SHIFT 11 #define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT) +#define NETDEV_XDP_ACT_XSK (NETDEV_XDP_ACT_BASIC | \ + NETDEV_XDP_ACT_REDIRECT | \ + NETDEV_XDP_ACT_XSK_ZEROCOPY) + struct xsk_cb_desc { void *src; u8 off; @@ -47,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { @@ -118,7 +127,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) goto out; list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { - list_del(&pos->list_node); + list_del_init(&pos->list_node); xp_free(pos); } @@ -153,7 +162,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) frag = list_first_entry_or_null(&xskb->pool->xskb_list, struct xdp_buff_xsk, list_node); if (frag) { - list_del(&frag->list_node); + list_del_init(&frag->list_node); ret = &frag->xdp; } @@ -164,7 +173,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); - list_del(&xskb->list_node); + list_del_init(&xskb->list_node); } static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) @@ -333,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return 0; } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0a14daaa5dd4..10d3edde6b2f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1156,19 +1156,19 @@ struct xfrm_offload { #define CRYPTO_INVALID_PROTOCOL 128 /* Used to keep whole l2 header for transport mode GRO */ - __u32 orig_mac_len; + __u16 orig_mac_len; __u8 proto; __u8 inner_ipproto; }; struct sec_path { - int len; - int olen; - int verified_cnt; - struct xfrm_state *xvec[XFRM_MAX_DEPTH]; struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; + + u8 len; + u8 olen; + u8 verified_cnt; }; struct sec_path *secpath_set(struct sk_buff *skb); diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index cac56e6b0869..0b1abdb99c9e 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -85,11 +85,11 @@ struct xsk_buff_pool { bool unaligned; bool tx_sw_csum; void *addrs; - /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: - * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when - * sockets share a single cq when the same netdev and queue id is shared. + /* Mutual exclusion of the completion ring in the SKB mode. + * Protect: NAPI TX thread and sendmsg error paths in the SKB + * destructor callback. */ - spinlock_t cq_lock; + spinlock_t cq_prod_lock; struct xdp_buff_xsk *free_heads[]; }; |
