summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/addrconf.h29
-rw-r--r--include/net/af_rxrpc.h56
-rw-r--r--include/net/af_unix.h81
-rw-r--r--include/net/af_vsock.h3
-rw-r--r--include/net/ax25.h1
-rw-r--r--include/net/bluetooth/bluetooth.h5
-rw-r--r--include/net/bluetooth/hci.h54
-rw-r--r--include/net/bluetooth/hci_core.h288
-rw-r--r--include/net/bluetooth/hci_drv.h76
-rw-r--r--include/net/bluetooth/hci_mon.h2
-rw-r--r--include/net/bluetooth/hci_sync.h8
-rw-r--r--include/net/bluetooth/l2cap.h10
-rw-r--r--include/net/bluetooth/mgmt.h1
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/busy_poll.h21
-rw-r--r--include/net/cfg80211.h203
-rw-r--r--include/net/checksum.h14
-rw-r--r--include/net/devlink.h35
-rw-r--r--include/net/dropreason-core.h107
-rw-r--r--include/net/dropreason.h6
-rw-r--r--include/net/dsa.h11
-rw-r--r--include/net/dst.h26
-rw-r--r--include/net/dst_metadata.h7
-rw-r--r--include/net/fib_rules.h29
-rw-r--r--include/net/flow.h2
-rw-r--r--include/net/genetlink.h6
-rw-r--r--include/net/gro.h38
-rw-r--r--include/net/hotdata.h1
-rw-r--r--include/net/inet6_connection_sock.h2
-rw-r--r--include/net/inet6_hashtables.h2
-rw-r--r--include/net/inet_connection_sock.h33
-rw-r--r--include/net/inet_frag.h6
-rw-r--r--include/net/inet_hashtables.h18
-rw-r--r--include/net/inet_sock.h8
-rw-r--r--include/net/inet_timewait_sock.h4
-rw-r--r--include/net/ip.h26
-rw-r--r--include/net/ip6_fib.h1
-rw-r--r--include/net/ip_fib.h5
-rw-r--r--include/net/ip_tunnels.h19
-rw-r--r--include/net/ip_vs.h13
-rw-r--r--include/net/ipcomp.h13
-rw-r--r--include/net/ipv6.h24
-rw-r--r--include/net/ipv6_frag.h5
-rw-r--r--include/net/iucv/iucv.h30
-rw-r--r--include/net/kcm.h1
-rw-r--r--include/net/l3mdev.h27
-rw-r--r--include/net/libeth/rx.h47
-rw-r--r--include/net/lwtunnel.h9
-rw-r--r--include/net/mac80211.h126
-rw-r--r--include/net/macsec.h4
-rw-r--r--include/net/mana/gdma.h65
-rw-r--r--include/net/mana/hw_channel.h9
-rw-r--r--include/net/mana/mana.h11
-rw-r--r--include/net/mctp.h2
-rw-r--r--include/net/mptcp.h26
-rw-r--r--include/net/net_namespace.h9
-rw-r--r--include/net/netdev_lock.h138
-rw-r--r--include/net/netdev_netlink.h12
-rw-r--r--include/net/netdev_queues.h43
-rw-r--r--include/net/netdev_rx_queue.h7
-rw-r--r--include/net/netfilter/nf_conntrack.h33
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h12
-rw-r--r--include/net/netfilter/nf_flow_table.h3
-rw-r--r--include/net/netfilter/nf_tables.h15
-rw-r--r--include/net/netfilter/nf_tproxy.h4
-rw-r--r--include/net/netfilter/nft_fib.h30
-rw-r--r--include/net/netlink.h81
-rw-r--r--include/net/netmem.h133
-rw-r--r--include/net/netns/ipv4.h16
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--include/net/nexthop.h2
-rw-r--r--include/net/p8022.h16
-rw-r--r--include/net/page_pool/helpers.h93
-rw-r--r--include/net/page_pool/memory_provider.h51
-rw-r--r--include/net/page_pool/types.h23
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/pkt_sched.h25
-rw-r--r--include/net/route.h47
-rw-r--r--include/net/rps.h31
-rw-r--r--include/net/rstreason.h2
-rw-r--r--include/net/rtnetlink.h40
-rw-r--r--include/net/sch_generic.h39
-rw-r--r--include/net/scm.h121
-rw-r--r--include/net/sctp/checksum.h32
-rw-r--r--include/net/sctp/sctp.h4
-rw-r--r--include/net/sctp/sm.h1
-rw-r--r--include/net/sctp/structs.h5
-rw-r--r--include/net/secure_seq.h4
-rw-r--r--include/net/snmp.h5
-rw-r--r--include/net/sock.h144
-rw-r--r--include/net/strparser.h4
-rw-r--r--include/net/tc_act/tc_ctinfo.h6
-rw-r--r--include/net/tcp.h137
-rw-r--r--include/net/tls.h3
-rw-r--r--include/net/udp.h25
-rw-r--r--include/net/udp_tunnel.h15
-rw-r--r--include/net/vxlan.h6
-rw-r--r--include/net/xdp.h198
-rw-r--r--include/net/xdp_sock.h13
-rw-r--r--include/net/xdp_sock_drv.h73
-rw-r--r--include/net/xfrm.h98
-rw-r--r--include/net/xsk_buff_pool.h16
102 files changed, 2490 insertions, 987 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 363dd63babe7..9e5e95988b9e 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -88,6 +88,23 @@ struct ifa6_config {
u16 scope;
};
+enum addr_type_t {
+ UNICAST_ADDR,
+ MULTICAST_ADDR,
+ ANYCAST_ADDR,
+};
+
+struct inet6_fill_args {
+ u32 portid;
+ u32 seq;
+ int event;
+ unsigned int flags;
+ int netnsid;
+ int ifindex;
+ enum addr_type_t type;
+ bool force_rt_scope_universe;
+};
+
int addrconf_init(void);
void addrconf_cleanup(void);
@@ -330,6 +347,11 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
return rcu_dereference_rtnl(dev->ip6_ptr);
}
+static inline struct inet6_dev *__in6_dev_get_rtnl_net(const struct net_device *dev)
+{
+ return rtnl_net_dereference(dev_net(dev), dev->ip6_ptr);
+}
+
/**
* __in6_dev_stats_get - get inet6_dev pointer for stats
* @dev: network device
@@ -525,4 +547,11 @@ int if6_proc_init(void);
void if6_proc_exit(void);
#endif
+int inet6_fill_ifmcaddr(struct sk_buff *skb,
+ const struct ifmcaddr6 *ifmca,
+ struct inet6_fill_args *args);
+
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
+ struct inet6_fill_args *args);
#endif
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 0754c463224a..0fb4c41c9bbf 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -16,6 +16,7 @@ struct sock;
struct socket;
struct rxrpc_call;
struct rxrpc_peer;
+struct krb5_buffer;
enum rxrpc_abort_reason;
enum rxrpc_interruptibility {
@@ -24,23 +25,33 @@ enum rxrpc_interruptibility {
RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */
};
+enum rxrpc_oob_type {
+ RXRPC_OOB_CHALLENGE, /* Security challenge for a connection */
+};
+
/*
* Debug ID counter for tracing.
*/
extern atomic_t rxrpc_debug_id;
+/*
+ * Operations table for rxrpc to call out to a kernel application (e.g. kAFS).
+ */
+struct rxrpc_kernel_ops {
+ void (*notify_new_call)(struct sock *sk, struct rxrpc_call *call,
+ unsigned long user_call_ID);
+ void (*discard_new_call)(struct rxrpc_call *call, unsigned long user_call_ID);
+ void (*user_attach_call)(struct rxrpc_call *call, unsigned long user_call_ID);
+ void (*notify_oob)(struct sock *sk, struct sk_buff *oob);
+};
+
typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
unsigned long);
typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *,
unsigned long);
-typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *,
- unsigned long);
-typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long);
-typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long);
-void rxrpc_kernel_new_call_notification(struct socket *,
- rxrpc_notify_new_call_t,
- rxrpc_discard_new_call_t);
+void rxrpc_kernel_set_notifications(struct socket *sock,
+ const struct rxrpc_kernel_ops *app_ops);
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct rxrpc_peer *peer,
struct key *key,
@@ -69,17 +80,36 @@ struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer);
struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call);
const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer);
const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer);
+unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data);
+unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer);
unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *);
-int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
- rxrpc_user_attach_call_t, unsigned long, gfp_t,
- unsigned int);
+int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx,
+ unsigned long user_call_ID, gfp_t gfp,
+ unsigned int debug_id);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
-u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
-void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
- unsigned long);
int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
int rxrpc_sock_set_security_keyring(struct sock *, struct key *);
+int rxrpc_sock_set_manage_response(struct sock *sk, bool set);
+
+enum rxrpc_oob_type rxrpc_kernel_query_oob(struct sk_buff *oob,
+ struct rxrpc_peer **_peer,
+ unsigned long *_peer_appdata);
+struct sk_buff *rxrpc_kernel_dequeue_oob(struct socket *sock,
+ enum rxrpc_oob_type *_type);
+void rxrpc_kernel_free_oob(struct sk_buff *oob);
+void rxrpc_kernel_query_challenge(struct sk_buff *challenge,
+ struct rxrpc_peer **_peer,
+ unsigned long *_peer_appdata,
+ u16 *_service_id, u8 *_security_index);
+int rxrpc_kernel_reject_challenge(struct sk_buff *challenge, u32 abort_code,
+ int error, enum rxrpc_abort_reason why);
+int rxkad_kernel_respond_to_challenge(struct sk_buff *challenge);
+u32 rxgk_kernel_query_challenge(struct sk_buff *challenge);
+int rxgk_kernel_respond_to_challenge(struct sk_buff *challenge,
+ struct krb5_buffer *appdata);
+u8 rxrpc_kernel_query_call_security(struct rxrpc_call *call,
+ u16 *_service_id, u32 *_enctype);
#endif /* _NET_RXRPC_H */
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 63129c79b8cb..1af1841b7601 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -2,11 +2,15 @@
#ifndef __LINUX_NET_AFUNIX_H
#define __LINUX_NET_AFUNIX_H
-#include <linux/socket.h>
-#include <linux/un.h>
+#include <linux/atomic.h>
#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/path.h>
#include <linux/refcount.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
#include <net/sock.h>
+#include <uapi/linux/un.h>
#if IS_ENABLED(CONFIG_UNIX)
struct unix_sock *unix_get_socket(struct file *filp);
@@ -17,61 +21,17 @@ static inline struct unix_sock *unix_get_socket(struct file *filp)
}
#endif
-extern unsigned int unix_tot_inflight;
-void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
-void unix_del_edges(struct scm_fp_list *fpl);
-void unix_update_edges(struct unix_sock *receiver);
-int unix_prepare_fpl(struct scm_fp_list *fpl);
-void unix_destroy_fpl(struct scm_fp_list *fpl);
-void unix_gc(void);
-void wait_for_unix_gc(struct scm_fp_list *fpl);
-
-struct unix_vertex {
- struct list_head edges;
- struct list_head entry;
- struct list_head scc_entry;
- unsigned long out_degree;
- unsigned long index;
- unsigned long scc_index;
-};
-
-struct unix_edge {
- struct unix_sock *predecessor;
- struct unix_sock *successor;
- struct list_head vertex_entry;
- struct list_head stack_entry;
-};
-
-struct sock *unix_peer_get(struct sock *sk);
-
-#define UNIX_HASH_MOD (256 - 1)
-#define UNIX_HASH_SIZE (256 * 2)
-#define UNIX_HASH_BITS 8
-
struct unix_address {
refcount_t refcnt;
int len;
struct sockaddr_un name[];
};
-struct unix_skb_parms {
- struct pid *pid; /* Skb credentials */
- kuid_t uid;
- kgid_t gid;
- struct scm_fp_list *fp; /* Passed files */
-#ifdef CONFIG_SECURITY_NETWORK
- u32 secid; /* Security ID */
-#endif
- u32 consumed;
-} __randomize_layout;
-
struct scm_stat {
atomic_t nr_fds;
unsigned long nr_unix_fds;
};
-#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
-
/* The AF_UNIX socket */
struct unix_sock {
/* WARNING: sk has to be the first member */
@@ -84,6 +44,7 @@ struct unix_sock {
struct unix_vertex *vertex;
spinlock_t lock;
struct socket_wq peer_wq;
+#define peer_wait peer_wq.wait
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
@@ -97,32 +58,4 @@ struct unix_sock {
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-#define peer_wait peer_wq.wait
-
-long unix_inq_len(struct sock *sk);
-long unix_outq_len(struct sock *sk);
-
-int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
- int flags);
-int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
- int flags);
-#ifdef CONFIG_SYSCTL
-int unix_sysctl_register(struct net *net);
-void unix_sysctl_unregister(struct net *net);
-#else
-static inline int unix_sysctl_register(struct net *net) { return 0; }
-static inline void unix_sysctl_unregister(struct net *net) {}
-#endif
-
-#ifdef CONFIG_BPF_SYSCALL
-extern struct proto unix_dgram_proto;
-extern struct proto unix_stream_proto;
-
-int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
-int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
-void __init unix_bpf_build_proto(void);
-#else
-static inline void __init unix_bpf_build_proto(void)
-{}
-#endif
#endif
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 9e85424c8343..d40e978126e3 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -221,6 +221,7 @@ void vsock_for_each_connected_socket(struct vsock_transport *transport,
void (*fn)(struct sock *sk));
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
bool vsock_find_cid(unsigned int cid);
+void vsock_linger(struct sock *sk);
/**** TAP ****/
@@ -242,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags);
-#ifdef CONFIG_BPF_SYSCALL
extern struct proto vsock_proto;
+#ifdef CONFIG_BPF_SYSCALL
int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void __init vsock_bpf_build_proto(void);
#else
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 4ee141aae0a2..a7bba42dde15 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -418,7 +418,6 @@ void ax25_rt_device_down(struct net_device *);
int ax25_rt_ioctl(unsigned int, void __user *);
extern const struct seq_operations ax25_rt_seqops;
ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev);
-int ax25_rt_autobind(ax25_cb *, ax25_address *);
struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *,
ax25_address *, ax25_digi *);
void ax25_rt_free(void);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 435250c72d56..114299bd8b98 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -29,6 +29,7 @@
#include <linux/poll.h>
#include <net/sock.h>
#include <linux/seq_file.h>
+#include <linux/ethtool.h>
#define BT_SUBSYS_VERSION 2
#define BT_SUBSYS_REVISION 22
@@ -156,6 +157,7 @@ struct bt_voice {
#define BT_PKT_STATUS 16
#define BT_SCM_PKT_STATUS 0x03
+#define BT_SCM_ERROR 0x04
#define BT_ISO_QOS 17
@@ -447,6 +449,9 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
hci_req_complete_t *req_complete,
hci_req_complete_skb_t *req_complete_skb);
+int hci_ethtool_ts_info(unsigned int index, int sk_proto,
+ struct kernel_ethtool_ts_info *ts_info);
+
#define HCI_REQ_START BIT(0)
#define HCI_REQ_SKB BIT(1)
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 6203bd8663b7..8fa829873134 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -208,6 +208,13 @@ enum {
*/
HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+ /* When this quirk is set consider Sync Flow Control as supported by
+ * the driver.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED,
+
/* When this quirk is set, the LE states reported through the
* HCI_LE_READ_SUPPORTED_STATES are invalid/broken.
*
@@ -354,6 +361,24 @@ enum {
* during the hdev->setup vendor callback.
*/
HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
+
+ /* When this quirk is set, the HCI_OP_READ_VOICE_SETTING command is
+ * skipped. This is required for a subset of the CSR controller clones
+ * which erroneously claim to support it.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_BROKEN_READ_VOICE_SETTING,
+
+ /* When this quirk is set, the HCI_OP_READ_PAGE_SCAN_TYPE command is
+ * skipped. This is required for a subset of the CSR controller clones
+ * which erroneously claim to support it.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE,
+
+ __HCI_NUM_QUIRKS,
};
/* HCI device flags */
@@ -432,13 +457,13 @@ enum {
HCI_WIDEBAND_SPEECH_ENABLED,
HCI_EVENT_FILTER_CONFIGURED,
HCI_PA_SYNC,
+ HCI_SCO_FLOWCTL,
HCI_DUT_MODE,
HCI_VENDOR_DIAG,
HCI_FORCE_BREDR_SMP,
HCI_FORCE_STATIC_ADDR,
HCI_LL_RPA_RESOLUTION,
- HCI_ENABLE_LL_PRIVACY,
HCI_CMD_PENDING,
HCI_FORCE_NO_MITM,
HCI_QUALITY_REPORT,
@@ -471,6 +496,7 @@ enum {
#define HCI_EVENT_PKT 0x04
#define HCI_ISODATA_PKT 0x05
#define HCI_DIAG_PKT 0xf0
+#define HCI_DRV_PKT 0xf1
#define HCI_VENDOR_PKT 0xff
/* HCI packet types */
@@ -534,7 +560,8 @@ enum {
#define ESCO_LINK 0x02
/* Low Energy links do not have defined link type. Use invented one */
#define LE_LINK 0x80
-#define ISO_LINK 0x82
+#define CIS_LINK 0x82
+#define BIS_LINK 0x83
#define INVALID_LINK 0xff
/* LMP features */
@@ -684,7 +711,7 @@ enum {
#define HCI_ERROR_REMOTE_POWER_OFF 0x15
#define HCI_ERROR_LOCAL_HOST_TERM 0x16
#define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18
-#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e
+#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1a
#define HCI_ERROR_INVALID_LL_PARAMS 0x1e
#define HCI_ERROR_UNSPECIFIED 0x1f
#define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c
@@ -856,6 +883,11 @@ struct hci_cp_remote_name_req_cancel {
bdaddr_t bdaddr;
} __packed;
+struct hci_rp_remote_name_req_cancel {
+ __u8 status;
+ bdaddr_t bdaddr;
+} __packed;
+
#define HCI_OP_READ_REMOTE_FEATURES 0x041b
struct hci_cp_read_remote_features {
__le16 handle;
@@ -1529,6 +1561,11 @@ struct hci_rp_read_tx_power {
__s8 tx_power;
} __packed;
+#define HCI_OP_WRITE_SYNC_FLOWCTL 0x0c2f
+struct hci_cp_write_sync_flowctl {
+ __u8 enable;
+} __packed;
+
#define HCI_OP_READ_PAGE_SCAN_TYPE 0x0c46
struct hci_rp_read_page_scan_type {
__u8 status;
@@ -1898,6 +1935,8 @@ struct hci_cp_le_pa_create_sync {
__u8 sync_cte_type;
} __packed;
+#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045
+
#define HCI_OP_LE_PA_TERM_SYNC 0x2046
struct hci_cp_le_pa_term_sync {
__le16 handle;
@@ -2595,6 +2634,7 @@ struct hci_ev_le_conn_complete {
#define LE_EXT_ADV_DIRECT_IND 0x0004
#define LE_EXT_ADV_SCAN_RSP 0x0008
#define LE_EXT_ADV_LEGACY_PDU 0x0010
+#define LE_EXT_ADV_DATA_STATUS_MASK 0x0060
#define LE_EXT_ADV_EVT_TYPE_MASK 0x007f
#define ADDR_LE_DEV_PUBLIC 0x00
@@ -2797,7 +2837,7 @@ struct hci_evt_le_create_big_complete {
__le16 bis_handle[];
} __packed;
-#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d
+#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d
struct hci_evt_le_big_sync_estabilished {
__u8 status;
__u8 handle;
@@ -2812,6 +2852,12 @@ struct hci_evt_le_big_sync_estabilished {
__le16 bis[];
} __packed;
+#define HCI_EVT_LE_BIG_SYNC_LOST 0x1e
+struct hci_evt_le_big_sync_lost {
+ __u8 handle;
+ __u8 reason;
+} __packed;
+
#define HCI_EVT_LE_BIG_INFO_ADV_REPORT 0x22
struct hci_evt_le_big_info_adv_report {
__le16 sync_handle;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ca22ead85dbe..f22881bf1b39 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -29,8 +29,11 @@
#include <linux/idr.h>
#include <linux/leds.h>
#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/srcu.h>
#include <net/bluetooth/hci.h>
+#include <net/bluetooth/hci_drv.h>
#include <net/bluetooth/hci_sync.h>
#include <net/bluetooth/hci_sock.h>
#include <net/bluetooth/coredump.h>
@@ -92,6 +95,7 @@ struct discovery_state {
u16 uuid_count;
u8 (*uuids)[16];
unsigned long name_resolve_timeout;
+ spinlock_t lock;
};
#define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */
@@ -157,8 +161,9 @@ struct bdaddr_list_with_irk {
/* Bitmask of connection flags */
enum hci_conn_flags {
- HCI_CONN_FLAG_REMOTE_WAKEUP = 1,
- HCI_CONN_FLAG_DEVICE_PRIVACY = 2,
+ HCI_CONN_FLAG_REMOTE_WAKEUP = BIT(0),
+ HCI_CONN_FLAG_DEVICE_PRIVACY = BIT(1),
+ HCI_CONN_FLAG_ADDRESS_RESOLUTION = BIT(2),
};
typedef u8 hci_conn_flags_t;
@@ -240,6 +245,7 @@ struct adv_info {
__u8 mesh;
__u8 instance;
__u8 handle;
+ __u8 sid;
__u32 flags;
__u16 timeout;
__u16 remaining_time;
@@ -260,6 +266,12 @@ struct adv_info {
struct delayed_work rpa_expired_cb;
};
+struct tx_queue {
+ struct sk_buff_head queue;
+ unsigned int extra;
+ unsigned int tracked;
+};
+
#define HCI_MAX_ADV_INSTANCES 5
#define HCI_DEFAULT_ADV_DURATION 2
@@ -338,6 +350,7 @@ struct adv_monitor {
struct hci_dev {
struct list_head list;
+ struct srcu_struct srcu;
struct mutex lock;
struct ida unset_handle_ida;
@@ -453,7 +466,7 @@ struct hci_dev {
unsigned int auto_accept_delay;
- unsigned long quirks;
+ DECLARE_BITMAP(quirk_flags, __HCI_NUM_QUIRKS);
atomic_t cmd_cnt;
unsigned int acl_cnt;
@@ -538,6 +551,7 @@ struct hci_dev {
struct hci_conn_hash conn_hash;
struct list_head mesh_pending;
+ struct mutex mgmt_pending_lock;
struct list_head mgmt_pending;
struct list_head reject_list;
struct list_head accept_list;
@@ -606,6 +620,8 @@ struct hci_dev {
struct list_head monitored_devices;
bool advmon_pend_notify;
+ struct hci_drv *hci_drv;
+
#if IS_ENABLED(CONFIG_BT_LEDS)
struct led_trigger *power_led;
#endif
@@ -632,7 +648,6 @@ struct hci_dev {
int (*post_init)(struct hci_dev *hdev);
int (*set_diag)(struct hci_dev *hdev, bool enable);
int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
- void (*cmd_timeout)(struct hci_dev *hdev);
void (*reset)(struct hci_dev *hdev);
bool (*wakeup)(struct hci_dev *hdev);
int (*set_quality_report)(struct hci_dev *hdev, bool enable);
@@ -643,6 +658,10 @@ struct hci_dev {
u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb);
};
+#define hci_set_quirk(hdev, nr) set_bit((nr), (hdev)->quirk_flags)
+#define hci_clear_quirk(hdev, nr) clear_bit((nr), (hdev)->quirk_flags)
+#define hci_test_quirk(hdev, nr) test_bit((nr), (hdev)->quirk_flags)
+
#define HCI_PHY_HANDLE(handle) (handle & 0xff)
enum conn_reasons {
@@ -733,6 +752,8 @@ struct hci_conn {
struct sk_buff_head data_q;
struct list_head chan_list;
+ struct tx_queue tx_q;
+
struct delayed_work disc_work;
struct delayed_work auto_accept_work;
struct delayed_work idle_work;
@@ -804,6 +825,7 @@ struct hci_conn_params {
extern struct list_head hci_dev_list;
extern struct list_head hci_cb_list;
extern rwlock_t hci_dev_list_lock;
+extern struct mutex hci_cb_list_lock;
#define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags)
#define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags)
@@ -813,20 +835,20 @@ extern rwlock_t hci_dev_list_lock;
#define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags)
#define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags)
-#define hci_dev_clear_volatile_flags(hdev) \
- do { \
- hci_dev_clear_flag(hdev, HCI_LE_SCAN); \
- hci_dev_clear_flag(hdev, HCI_LE_ADV); \
- hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\
- hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \
- hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \
+#define hci_dev_clear_volatile_flags(hdev) \
+ do { \
+ hci_dev_clear_flag((hdev), HCI_LE_SCAN); \
+ hci_dev_clear_flag((hdev), HCI_LE_ADV); \
+ hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \
+ hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \
+ hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \
} while (0)
#define hci_dev_le_state_simultaneous(hdev) \
- (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \
- (hdev->le_states[4] & 0x08) && /* Central */ \
- (hdev->le_states[4] & 0x40) && /* Peripheral */ \
- (hdev->le_states[3] & 0x10)) /* Simultaneous */
+ (!hci_test_quirk((hdev), HCI_QUIRK_BROKEN_LE_STATES) && \
+ ((hdev)->le_states[4] & 0x08) && /* Central */ \
+ ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \
+ ((hdev)->le_states[3] & 0x10)) /* Simultaneous */
/* ----- HCI interface to upper protocols ----- */
int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
@@ -869,6 +891,7 @@ static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb,
static inline void discovery_init(struct hci_dev *hdev)
{
+ spin_lock_init(&hdev->discovery.lock);
hdev->discovery.state = DISCOVERY_STOPPED;
INIT_LIST_HEAD(&hdev->discovery.all);
INIT_LIST_HEAD(&hdev->discovery.unknown);
@@ -883,8 +906,11 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
hdev->discovery.report_invalid_rssi = true;
hdev->discovery.rssi = HCI_RSSI_INVALID;
hdev->discovery.uuid_count = 0;
+
+ spin_lock(&hdev->discovery.lock);
kfree(hdev->discovery.uuids);
hdev->discovery.uuids = NULL;
+ spin_unlock(&hdev->discovery.lock);
}
bool hci_discovery_active(struct hci_dev *hdev);
@@ -987,7 +1013,8 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c)
case ESCO_LINK:
h->sco_num++;
break;
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
h->iso_num++;
break;
}
@@ -1013,7 +1040,8 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c)
case ESCO_LINK:
h->sco_num--;
break;
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
h->iso_num--;
break;
}
@@ -1030,7 +1058,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type)
case SCO_LINK:
case ESCO_LINK:
return h->sco_num;
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
return h->iso_num;
default:
return 0;
@@ -1091,7 +1120,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (bacmp(&c->dst, ba) || c->type != ISO_LINK)
+ if (bacmp(&c->dst, ba) || c->type != BIS_LINK)
continue;
if (c->iso_qos.bcast.bis == bis) {
@@ -1104,10 +1133,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
return NULL;
}
-static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev,
- __u8 sid,
- bdaddr_t *dst,
- __u8 dst_type)
+static inline struct hci_conn *
+hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
@@ -1115,8 +1142,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK || bacmp(&c->dst, dst) ||
- c->dst_type != dst_type || c->sid != sid)
+ if (c->type != BIS_LINK)
+ continue;
+
+ if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags))
continue;
rcu_read_unlock();
@@ -1139,8 +1168,8 @@ hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (bacmp(&c->dst, ba) || c->type != ISO_LINK ||
- !test_bit(HCI_CONN_PER_ADV, &c->flags))
+ if (bacmp(&c->dst, ba) || c->type != BIS_LINK ||
+ !test_bit(HCI_CONN_PER_ADV, &c->flags))
continue;
if (c->iso_qos.bcast.big == big &&
@@ -1229,7 +1258,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
+ if (c->type != CIS_LINK)
continue;
/* Match CIG ID if set */
@@ -1261,7 +1290,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
+ if (c->type != CIS_LINK)
continue;
if (handle == c->iso_qos.ucast.cig) {
@@ -1284,17 +1313,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
- continue;
-
- /* An ISO_LINK hcon with BDADDR_ANY as destination
- * address is a Broadcast connection. A Broadcast
- * slave connection is associated with a PA train,
- * so the sync_handle can be used to differentiate
- * from unicast.
- */
- if (bacmp(&c->dst, BDADDR_ANY) &&
- c->sync_handle == HCI_SYNC_HANDLE_INVALID)
+ if (c->type != BIS_LINK)
continue;
if (handle == c->iso_qos.bcast.big) {
@@ -1318,7 +1337,7 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != BIS_LINK)
continue;
if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) {
@@ -1333,7 +1352,8 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev,
}
static inline struct hci_conn *
-hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state)
+hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state,
+ __u8 role)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
@@ -1341,8 +1361,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK ||
- c->state != state)
+ if (c->type != BIS_LINK || c->state != state || c->role != role)
continue;
if (handle == c->iso_qos.bcast.big) {
@@ -1365,8 +1384,8 @@ hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK ||
- !test_bit(HCI_CONN_PA_SYNC, &c->flags))
+ if (c->type != BIS_LINK ||
+ !test_bit(HCI_CONN_PA_SYNC, &c->flags))
continue;
if (c->iso_qos.bcast.big == big) {
@@ -1388,7 +1407,7 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != BIS_LINK)
continue;
/* Ignore the listen hcon, we are looking
@@ -1515,8 +1534,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
void hci_sco_setup(struct hci_conn *conn, __u8 status);
bool hci_iso_setup_path(struct hci_conn *conn);
int hci_le_create_cis_pending(struct hci_dev *hdev);
-int hci_pa_create_sync_pending(struct hci_dev *hdev);
-int hci_le_big_create_sync_pending(struct hci_dev *hdev);
int hci_conn_check_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
@@ -1547,19 +1564,20 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
u16 timeout);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
-struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid,
struct bt_iso_qos *qos,
__u8 base_len, __u8 *base);
struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 dst_type, __u8 sid,
+ struct bt_iso_qos *qos,
__u8 data_len, __u8 *data);
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid, struct bt_iso_qos *qos);
-int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
- struct bt_iso_qos *qos,
- __u16 sync_handle, __u8 num_bis, __u8 bis[]);
+int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos, __u16 sync_handle,
+ __u8 num_bis, __u8 bis[]);
int hci_conn_check_link_mode(struct hci_conn *conn);
int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
@@ -1571,6 +1589,18 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
void hci_conn_failed(struct hci_conn *conn, u8 status);
u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);
+void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb);
+void hci_conn_tx_dequeue(struct hci_conn *conn);
+void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset,
+ const struct sockcm_cookie *sockc);
+
+static inline void hci_sockcm_init(struct sockcm_cookie *sockc, struct sock *sk)
+{
+ *sockc = (struct sockcm_cookie) {
+ .tsflags = READ_ONCE(sk->sk_tsflags),
+ };
+}
+
/*
* hci_conn_get() and hci_conn_put() are used to control the life-time of an
* "hci_conn" object. They do not guarantee that the hci_conn object is running,
@@ -1759,8 +1789,6 @@ int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type);
int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
u8 type);
-int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr,
- u8 type);
void hci_bdaddr_list_clear(struct list_head *list);
struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
@@ -1781,6 +1809,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
void hci_uuids_clear(struct hci_dev *hdev);
void hci_link_keys_clear(struct hci_dev *hdev);
+u8 *hci_conn_key_enc_size(struct hci_conn *conn);
struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr);
struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn,
bdaddr_t *bdaddr, u8 *val, u8 type,
@@ -1817,6 +1846,7 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
void hci_adv_instances_clear(struct hci_dev *hdev);
struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance);
+struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid);
struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance);
struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
u32 flags, u16 adv_data_len, u8 *adv_data,
@@ -1824,7 +1854,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
u16 timeout, u16 duration, s8 tx_power,
u32 min_interval, u32 max_interval,
u8 mesh_handle);
-struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
+struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid,
u32 flags, u8 data_len, u8 *data,
u32 min_interval, u32 max_interval);
int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance,
@@ -1859,6 +1889,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define lmp_hold_capable(dev) ((dev)->features[0][0] & LMP_HOLD)
#define lmp_sniff_capable(dev) ((dev)->features[0][0] & LMP_SNIFF)
#define lmp_park_capable(dev) ((dev)->features[0][1] & LMP_PARK)
+#define lmp_sco_capable(dev) ((dev)->features[0][1] & LMP_SCO)
#define lmp_inq_rssi_capable(dev) ((dev)->features[0][3] & LMP_RSSI_INQ)
#define lmp_esco_capable(dev) ((dev)->features[0][3] & LMP_ESCO)
#define lmp_bredr_capable(dev) (!((dev)->features[0][4] & LMP_NO_BREDR))
@@ -1911,40 +1942,40 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))
#define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \
- !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \
- &(dev)->quirks))
+ !hci_test_quirk((dev), \
+ HCI_QUIRK_BROKEN_LE_CODED))
#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY)
-/* Use LL Privacy based address resolution if supported */
-#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \
- hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY))
-
-#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \
- (hdev->commands[39] & 0x04))
+#define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \
+ ((dev)->commands[39] & 0x04))
#define read_key_size_capable(dev) \
((dev)->commands[20] & 0x10 && \
- !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE))
+
+#define read_voice_setting_capable(dev) \
+ ((dev)->commands[9] & 0x04 && \
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_READ_VOICE_SETTING))
/* Use enhanced synchronous connection if command is supported and its quirk
* has not been set.
*/
#define enhanced_sync_conn_capable(dev) \
(((dev)->commands[29] & 0x08) && \
- !test_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &(dev)->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN))
/* Use ext scanning if set ext scan param and ext scan enable is supported */
#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
((dev)->commands[37] & 0x40) && \
- !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_SCAN))
/* Use ext create connection if command is supported */
#define use_ext_conn(dev) (((dev)->commands[37] & 0x80) && \
- !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, &(dev)->quirks))
+ !hci_test_quirk((dev), HCI_QUIRK_BROKEN_EXT_CREATE_CONN))
/* Extended advertising support */
#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
@@ -1959,8 +1990,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
*/
#define use_enhanced_conn_complete(dev) ((ll_privacy_capable(dev) || \
ext_adv_capable(dev)) && \
- !test_bit(HCI_QUIRK_BROKEN_EXT_CREATE_CONN, \
- &(dev)->quirks))
+ !hci_test_quirk((dev), \
+ HCI_QUIRK_BROKEN_EXT_CREATE_CONN))
/* Periodic advertising support */
#define per_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_PERIODIC_ADV))
@@ -1977,7 +2008,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER)
#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \
- (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks)))
+ (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG)))
/* ----- HCI protocols ----- */
#define HCI_PROTO_DEFER 0x01
@@ -1993,7 +2024,8 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
case ESCO_LINK:
return sco_connect_ind(hdev, bdaddr, flags);
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
return iso_connect_ind(hdev, bdaddr, flags);
default:
@@ -2016,47 +2048,24 @@ struct hci_cb {
char *name;
- bool (*match) (struct hci_conn *conn);
void (*connect_cfm) (struct hci_conn *conn, __u8 status);
void (*disconn_cfm) (struct hci_conn *conn, __u8 status);
void (*security_cfm) (struct hci_conn *conn, __u8 status,
- __u8 encrypt);
+ __u8 encrypt);
void (*key_change_cfm) (struct hci_conn *conn, __u8 status);
void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role);
};
-static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list)
-{
- struct hci_cb *cb, *cpy;
-
- rcu_read_lock();
- list_for_each_entry_rcu(cb, &hci_cb_list, list) {
- if (cb->match && cb->match(conn)) {
- cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC);
- if (!cpy)
- break;
-
- *cpy = *cb;
- INIT_LIST_HEAD(&cpy->list);
- list_add_rcu(&cpy->list, list);
- }
- }
- rcu_read_unlock();
-}
-
static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->connect_cfm)
cb->connect_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->connect_cfm_cb)
conn->connect_cfm_cb(conn, status);
@@ -2064,43 +2073,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
+ struct hci_cb *cb;
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->disconn_cfm)
cb->disconn_cfm(conn, reason);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->disconn_cfm_cb)
conn->disconn_cfm_cb(conn, reason);
}
-static inline void hci_security_cfm(struct hci_conn *conn, __u8 status,
- __u8 encrypt)
-{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
- if (cb->security_cfm)
- cb->security_cfm(conn, status, encrypt);
- kfree(cb);
- }
-
- if (conn->security_cfm_cb)
- conn->security_cfm_cb(conn, status);
-}
-
static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
@@ -2108,11 +2096,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (conn->state == BT_CONFIG) {
@@ -2139,38 +2136,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
conn->sec_level = conn->pending_sec_level;
}
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
+ struct hci_cb *cb;
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->key_change_cfm)
cb->key_change_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
__u8 role)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->role_switch_cfm)
cb->role_switch_cfm(conn, status, role);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type)
@@ -2392,8 +2391,6 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
u8 status);
void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
-void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status);
-void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status);
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len,
@@ -2419,7 +2416,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
u8 instance);
void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
u8 instance);
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
bdaddr_t *bdaddr, u8 addr_type);
diff --git a/include/net/bluetooth/hci_drv.h b/include/net/bluetooth/hci_drv.h
new file mode 100644
index 000000000000..2f01c44f05ec
--- /dev/null
+++ b/include/net/bluetooth/hci_drv.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2025 Google Corporation
+ */
+
+#ifndef __HCI_DRV_H
+#define __HCI_DRV_H
+
+#include <linux/types.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci.h>
+
+struct hci_drv_cmd_hdr {
+ __le16 opcode;
+ __le16 len;
+} __packed;
+
+struct hci_drv_ev_hdr {
+ __le16 opcode;
+ __le16 len;
+} __packed;
+
+#define HCI_DRV_EV_CMD_STATUS 0x0000
+struct hci_drv_ev_cmd_status {
+ __le16 opcode;
+ __u8 status;
+} __packed;
+
+#define HCI_DRV_EV_CMD_COMPLETE 0x0001
+struct hci_drv_ev_cmd_complete {
+ __le16 opcode;
+ __u8 status;
+ __u8 data[];
+} __packed;
+
+#define HCI_DRV_STATUS_SUCCESS 0x00
+#define HCI_DRV_STATUS_UNSPECIFIED_ERROR 0x01
+#define HCI_DRV_STATUS_UNKNOWN_COMMAND 0x02
+#define HCI_DRV_STATUS_INVALID_PARAMETERS 0x03
+
+#define HCI_DRV_MAX_DRIVER_NAME_LENGTH 32
+
+/* Common commands that make sense on all drivers start from 0x0000 */
+#define HCI_DRV_OP_READ_INFO 0x0000
+#define HCI_DRV_READ_INFO_SIZE 0
+struct hci_drv_rp_read_info {
+ __u8 driver_name[HCI_DRV_MAX_DRIVER_NAME_LENGTH];
+ __le16 num_supported_commands;
+ __le16 supported_commands[];
+} __packed;
+
+/* Driver specific OGF (Opcode Group Field)
+ * Commands in this group may have different meanings across different drivers.
+ */
+#define HCI_DRV_OGF_DRIVER_SPECIFIC 0x01
+
+int hci_drv_cmd_status(struct hci_dev *hdev, u16 cmd, u8 status);
+int hci_drv_cmd_complete(struct hci_dev *hdev, u16 cmd, u8 status, void *rp,
+ size_t rp_len);
+int hci_drv_process_cmd(struct hci_dev *hdev, struct sk_buff *cmd_skb);
+
+struct hci_drv_handler {
+ int (*func)(struct hci_dev *hdev, void *data, u16 data_len);
+ size_t data_len;
+};
+
+struct hci_drv {
+ size_t common_handler_count;
+ const struct hci_drv_handler *common_handlers;
+
+ size_t specific_handler_count;
+ const struct hci_drv_handler *specific_handlers;
+};
+
+#endif /* __HCI_DRV_H */
diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h
index 082f89531b88..bbd752494ef9 100644
--- a/include/net/bluetooth/hci_mon.h
+++ b/include/net/bluetooth/hci_mon.h
@@ -51,6 +51,8 @@ struct hci_mon_hdr {
#define HCI_MON_CTRL_EVENT 17
#define HCI_MON_ISO_TX_PKT 18
#define HCI_MON_ISO_RX_PKT 19
+#define HCI_MON_DRV_TX_PKT 20
+#define HCI_MON_DRV_RX_PKT 21
struct hci_mon_new_index {
__u8 type;
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index f3052cb252ef..5224f57f6af2 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -115,8 +115,8 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance);
int hci_enable_advertising_sync(struct hci_dev *hdev);
int hci_enable_advertising(struct hci_dev *hdev);
-int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
- u8 *data, u32 flags, u16 min_interval,
+int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid,
+ u8 data_len, u8 *data, u32 flags, u16 min_interval,
u16 max_interval, u16 sync_interval);
int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance);
@@ -140,7 +140,6 @@ int hci_update_scan(struct hci_dev *hdev);
int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul);
int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk);
-int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance);
struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext,
struct sock *sk);
@@ -186,3 +185,6 @@ int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn);
int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn);
int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn,
struct hci_conn_params *params);
+
+int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d9c767cf773d..4bb0eaedda18 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -38,8 +38,8 @@
#define L2CAP_DEFAULT_TX_WINDOW 63
#define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF
#define L2CAP_DEFAULT_MAX_TX 3
-#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */
-#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */
+#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */
+#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */
#define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */
#define L2CAP_DEFAULT_ACK_TO 200
#define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF
@@ -668,7 +668,7 @@ struct l2cap_conn {
struct l2cap_chan *smp;
struct list_head chan_l;
- struct mutex chan_lock;
+ struct mutex lock;
struct kref ref;
struct list_head users;
};
@@ -955,7 +955,8 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason);
int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
bdaddr_t *dst, u8 dst_type, u16 timeout);
int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu);
-int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len);
+int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
+ const struct sockcm_cookie *sockc);
void l2cap_chan_busy(struct l2cap_chan *chan, int busy);
void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail);
int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator);
@@ -970,6 +971,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err);
void l2cap_send_conn_req(struct l2cap_chan *chan);
struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn);
+struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *conn);
void l2cap_conn_put(struct l2cap_conn *conn);
int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index affac861efdc..3575cd16049a 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -113,6 +113,7 @@ struct mgmt_rp_read_index_list {
#define MGMT_SETTING_CIS_PERIPHERAL BIT(19)
#define MGMT_SETTING_ISO_BROADCASTER BIT(20)
#define MGMT_SETTING_ISO_SYNC_RECEIVER BIT(21)
+#define MGMT_SETTING_LL_PRIVACY BIT(22)
#define MGMT_OP_READ_INFO 0x0004
#define MGMT_READ_INFO_SIZE 0
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 8bb5f016969f..95f67b308c19 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -695,6 +695,7 @@ void bond_debug_register(struct bonding *bond);
void bond_debug_unregister(struct bonding *bond);
void bond_debug_reregister(struct bonding *bond);
const char *bond_mode_name(int mode);
+bool bond_xdp_check(struct bonding *bond, int mode);
void bond_setup(struct net_device *bond_dev);
unsigned int bond_get_num_tx_queues(void);
int bond_netlink_init(void);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c39a426ebf52..6e172d0f6ef5 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -24,6 +24,11 @@
*/
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
+static inline bool napi_id_valid(unsigned int napi_id)
+{
+ return napi_id >= MIN_NAPI_ID;
+}
+
#define BUSY_POLL_BUDGET 8
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -114,7 +119,7 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
- if (napi_id >= MIN_NAPI_ID)
+ if (napi_id_valid(napi_id))
napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
READ_ONCE(sk->sk_prefer_busy_poll),
READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
@@ -122,18 +127,24 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
}
/* used in the NIC receive handler to mark the skb */
-static inline void skb_mark_napi_id(struct sk_buff *skb,
- struct napi_struct *napi)
+static inline void __skb_mark_napi_id(struct sk_buff *skb,
+ const struct gro_node *gro)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
/* If the skb was already marked with a valid NAPI ID, avoid overwriting
* it.
*/
- if (skb->napi_id < MIN_NAPI_ID)
- skb->napi_id = napi->napi_id;
+ if (!napi_id_valid(skb->napi_id))
+ skb->napi_id = gro->cached_napi_id;
#endif
}
+static inline void skb_mark_napi_id(struct sk_buff *skb,
+ const struct napi_struct *napi)
+{
+ __skb_mark_napi_id(skb, &napi->gro);
+}
+
/* used in the protocol handler to propagate the napi_id to the socket */
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 27acf1292a5c..1faeb587988f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <linux/ethtool.h>
@@ -127,6 +127,8 @@ struct wiphy;
* even if it is otherwise disabled.
* @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation
* with very low power (VLP), even if otherwise set to NO_IR.
+ * @IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY: Allow activity on a 20 MHz channel,
+ * even if otherwise set to NO_IR.
*/
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = BIT(0),
@@ -155,6 +157,7 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23),
IEEE80211_CHAN_CAN_MONITOR = BIT(24),
IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25),
+ IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY = BIT(26),
};
#define IEEE80211_CHAN_NO_HT40 \
@@ -630,7 +633,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
const struct ieee80211_sband_iftype_data *data;
int i;
- if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
+ if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
return NULL;
if (iftype == NL80211_IFTYPE_AP_VLAN)
@@ -1006,6 +1009,17 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
/**
+ * cfg80211_chandef_get_width - return chandef width in MHz
+ * @c: chandef to return bandwidth for
+ * Return: channel width in MHz for the given chandef; note that it returns
+ * 80 for 80+80 configurations
+ */
+static inline int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
+{
+ return nl80211_chan_width_to_mhz(c->width);
+}
+
+/**
* cfg80211_chandef_valid - check if a channel definition is valid
* @chandef: the channel definition to check
* Return: %true if the channel definition is valid. %false otherwise.
@@ -1083,43 +1097,6 @@ int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef,
int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef);
/**
- * ieee80211_chanwidth_rate_flags - return rate flags for channel width
- * @width: the channel width of the channel
- *
- * In some channel types, not all rates may be used - for example CCK
- * rates may not be used in 5/10 MHz channels.
- *
- * Returns: rate flags which apply for this channel width
- */
-static inline enum ieee80211_rate_flags
-ieee80211_chanwidth_rate_flags(enum nl80211_chan_width width)
-{
- switch (width) {
- case NL80211_CHAN_WIDTH_5:
- return IEEE80211_RATE_SUPPORTS_5MHZ;
- case NL80211_CHAN_WIDTH_10:
- return IEEE80211_RATE_SUPPORTS_10MHZ;
- default:
- break;
- }
- return 0;
-}
-
-/**
- * ieee80211_chandef_rate_flags - returns rate flags for a channel
- * @chandef: channel definition for the channel
- *
- * See ieee80211_chanwidth_rate_flags().
- *
- * Returns: rate flags which apply for this channel
- */
-static inline enum ieee80211_rate_flags
-ieee80211_chandef_rate_flags(struct cfg80211_chan_def *chandef)
-{
- return ieee80211_chanwidth_rate_flags(chandef->width);
-}
-
-/**
* ieee80211_chandef_max_power - maximum transmission power for the chandef
*
* In some regulations, the transmit power may depend on the configured channel
@@ -1286,11 +1263,13 @@ struct cfg80211_crypto_settings {
* struct cfg80211_mbssid_config - AP settings for multi bssid
*
* @tx_wdev: pointer to the transmitted interface in the MBSSID set
+ * @tx_link_id: link ID of the transmitted profile in an MLD.
* @index: index of this AP in the multi bssid group.
* @ema: set to true if the beacons should be sent out in EMA mode.
*/
struct cfg80211_mbssid_config {
struct wireless_dev *tx_wdev;
+ u8 tx_link_id;
u8 index;
bool ema;
};
@@ -1756,6 +1735,9 @@ struct cfg80211_ttlm_params {
* @supported_oper_classes_len: number of supported operating classes
* @support_p2p_ps: information if station supports P2P PS mechanism
* @airtime_weight: airtime scheduler weight for this station
+ * @eml_cap_present: Specifies if EML capabilities field (@eml_cap) is
+ * present/updated
+ * @eml_cap: EML capabilities of this station
* @link_sta_params: link related params.
*/
struct station_parameters {
@@ -1780,6 +1762,8 @@ struct station_parameters {
u8 supported_oper_classes_len;
int support_p2p_ps;
u16 airtime_weight;
+ bool eml_cap_present;
+ u16 eml_cap;
struct link_station_parameters link_sta_params;
};
@@ -2045,9 +2029,6 @@ struct cfg80211_tid_stats {
* @assoc_at: bootime (ns) of the last association
* @rx_bytes: bytes (size of MPDUs) received from this station
* @tx_bytes: bytes (size of MPDUs) transmitted to this station
- * @llid: mesh local link id
- * @plid: mesh peer link id
- * @plink_state: mesh peer link state
* @signal: The signal strength, type depends on the wiphy's signal_type.
* For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_.
* @signal_avg: Average signal strength, type depends on the wiphy's signal_type.
@@ -2067,14 +2048,20 @@ struct cfg80211_tid_stats {
* This number should increase every time the list of stations
* changes, i.e. when a station is added or removed, so that
* userspace can tell whether it got a consistent snapshot.
+ * @beacon_loss_count: Number of times beacon loss event has triggered.
* @assoc_req_ies: IEs from (Re)Association Request.
* This is used only when in AP mode with drivers that do not use
* user space MLME/SME implementation. The information is provided for
* the cfg80211_new_sta() calls to notify user space of the IEs.
* @assoc_req_ies_len: Length of assoc_req_ies buffer in octets.
* @sta_flags: station flags mask & values
- * @beacon_loss_count: Number of times beacon loss event has triggered.
* @t_offset: Time offset of the station relative to this host.
+ * @llid: mesh local link id
+ * @plid: mesh peer link id
+ * @plink_state: mesh peer link state
+ * @connected_to_gate: true if mesh STA has a path to mesh gate
+ * @connected_to_as: true if mesh STA has a path to authentication server
+ * @airtime_link_metric: mesh airtime link metric.
* @local_pm: local mesh STA power save mode
* @peer_pm: peer mesh STA power save mode
* @nonpeer_pm: non-peer mesh STA power save mode
@@ -2083,7 +2070,6 @@ struct cfg80211_tid_stats {
* @rx_beacon: number of beacons received from this peer
* @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
* from this peer
- * @connected_to_gate: true if mesh STA has a path to mesh gate
* @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
* @tx_duration: aggregate PPDU duration(usecs) for all the frames to a peer
* @airtime_weight: current airtime scheduling weight
@@ -2097,8 +2083,6 @@ struct cfg80211_tid_stats {
* @fcs_err_count: number of packets (MPDUs) received from this station with
* an FCS error. This counter should be incremented only when TA of the
* received packet with an FCS error matches the peer MAC address.
- * @airtime_link_metric: mesh airtime link metric.
- * @connected_to_as: true if mesh STA has a path to authentication server
* @mlo_params_valid: Indicates @assoc_link_id and @mld_addr fields are filled
* by driver. Drivers use this only in cfg80211_new_sta() calls when AP
* MLD's MLME/SME is offload to driver. Drivers won't fill this
@@ -2125,9 +2109,6 @@ struct station_info {
u64 assoc_at;
u64 rx_bytes;
u64 tx_bytes;
- u16 llid;
- u16 plid;
- u8 plink_state;
s8 signal;
s8 signal_avg;
@@ -2147,36 +2128,38 @@ struct station_info {
int generation;
+ u32 beacon_loss_count;
+
const u8 *assoc_req_ies;
size_t assoc_req_ies_len;
- u32 beacon_loss_count;
s64 t_offset;
+ u16 llid;
+ u16 plid;
+ u8 plink_state;
+ u8 connected_to_gate;
+ u8 connected_to_as;
+ u32 airtime_link_metric;
enum nl80211_mesh_power_mode local_pm;
enum nl80211_mesh_power_mode peer_pm;
enum nl80211_mesh_power_mode nonpeer_pm;
u32 expected_throughput;
- u64 tx_duration;
- u64 rx_duration;
- u64 rx_beacon;
- u8 rx_beacon_signal_avg;
- u8 connected_to_gate;
+ u16 airtime_weight;
- struct cfg80211_tid_stats *pertid;
s8 ack_signal;
s8 avg_ack_signal;
+ struct cfg80211_tid_stats *pertid;
- u16 airtime_weight;
+ u64 tx_duration;
+ u64 rx_duration;
+ u64 rx_beacon;
+ u8 rx_beacon_signal_avg;
u32 rx_mpdu_count;
u32 fcs_err_count;
- u32 airtime_link_metric;
-
- u8 connected_to_as;
-
bool mlo_params_valid;
u8 assoc_link_id;
u8 mld_addr[ETH_ALEN] __aligned(2);
@@ -2265,7 +2248,7 @@ static inline int cfg80211_get_station(struct net_device *dev,
* @MONITOR_FLAG_PLCPFAIL: pass frames with bad PLCP
* @MONITOR_FLAG_CONTROL: pass control frames
* @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering
- * @MONITOR_FLAG_COOK_FRAMES: report frames after processing
+ * @MONITOR_FLAG_COOK_FRAMES: deprecated, will unconditionally be refused
* @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address
* @MONITOR_FLAG_SKIP_TX: do not pass locally transmitted frames
*/
@@ -2707,7 +2690,7 @@ struct cfg80211_scan_request {
s8 tsf_report_link_id;
/* keep last */
- struct ieee80211_channel *channels[] __counted_by(n_channels);
+ struct ieee80211_channel *channels[];
};
static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
@@ -2947,6 +2930,7 @@ struct cfg80211_bss_ies {
* @nontrans_list: list of non-transmitted BSS, if this is a transmitted one
* (multi-BSSID support)
* @signal: signal strength value (type depends on the wiphy's signal_type)
+ * @ts_boottime: timestamp of the last BSS update in nanoseconds since boot
* @chains: bitmask for filled values in @chain_signal.
* @chain_signal: per-chain signal strength of last received BSS in dBm.
* @bssid_index: index in the multiple BSS set
@@ -2971,6 +2955,8 @@ struct cfg80211_bss {
s32 signal;
+ u64 ts_boottime;
+
u16 beacon_interval;
u16 capability;
@@ -3023,6 +3009,10 @@ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id)
*
* @bss: The BSS to authenticate with, the callee must obtain a reference
* to it if it needs to keep it.
+ * @supported_selectors: List of selectors that should be assumed to be
+ * supported by the station.
+ * SAE_H2E must be assumed supported if set to %NULL.
+ * @supported_selectors_len: Length of supported_selectors in octets.
* @auth_type: Authentication type (algorithm)
* @ie: Extra IEs to add to Authentication frame or %NULL
* @ie_len: Length of ie buffer in octets
@@ -3045,6 +3035,8 @@ struct cfg80211_auth_request {
struct cfg80211_bss *bss;
const u8 *ie;
size_t ie_len;
+ const u8 *supported_selectors;
+ u8 supported_selectors_len;
enum nl80211_auth_type auth_type;
const u8 *key;
u8 key_len;
@@ -3075,6 +3067,19 @@ struct cfg80211_assoc_link {
};
/**
+ * struct cfg80211_ml_reconf_req - MLO link reconfiguration request
+ * @add_links: data for links to add, see &struct cfg80211_assoc_link
+ * @rem_links: bitmap of links to remove
+ * @ext_mld_capa_ops: extended MLD capabilities and operations set by
+ * userspace for the ML reconfiguration action frame
+ */
+struct cfg80211_ml_reconf_req {
+ struct cfg80211_assoc_link add_links[IEEE80211_MLD_MAX_NUM_LINKS];
+ u16 rem_links;
+ u16 ext_mld_capa_ops;
+};
+
+/**
* enum cfg80211_assoc_req_flags - Over-ride default behaviour in association.
*
* @ASSOC_REQ_DISABLE_HT: Disable HT (802.11n)
@@ -3124,6 +3129,10 @@ enum cfg80211_assoc_req_flags {
* included in the Current AP address field of the Reassociation Request
* frame.
* @flags: See &enum cfg80211_assoc_req_flags
+ * @supported_selectors: supported BSS selectors in IEEE 802.11 format
+ * (or %NULL for no change).
+ * If %NULL, then support for SAE_H2E should be assumed.
+ * @supported_selectors_len: number of supported BSS selectors
* @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask
* will be used in ht_capa. Un-supported values will be ignored.
* @ht_capa_mask: The bits of ht_capa which are to be used.
@@ -3142,6 +3151,8 @@ enum cfg80211_assoc_req_flags {
* the link on which the association request should be sent
* @ap_mld_addr: AP MLD address in case of MLO association request,
* valid iff @link_id >= 0
+ * @ext_mld_capa_ops: extended MLD capabilities and operations set by
+ * userspace for the association
*/
struct cfg80211_assoc_request {
struct cfg80211_bss *bss;
@@ -3150,6 +3161,8 @@ struct cfg80211_assoc_request {
struct cfg80211_crypto_settings crypto;
bool use_mfp;
u32 flags;
+ const u8 *supported_selectors;
+ u8 supported_selectors_len;
struct ieee80211_ht_cap ht_capa;
struct ieee80211_ht_cap ht_capa_mask;
struct ieee80211_vht_cap vht_capa, vht_capa_mask;
@@ -3160,6 +3173,7 @@ struct cfg80211_assoc_request {
struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS];
const u8 *ap_mld_addr;
s8 link_id;
+ u16 ext_mld_capa_ops;
};
/**
@@ -4582,8 +4596,18 @@ struct mgmt_frame_regs {
*
* @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
* @set_ttlm: set the TID to link mapping.
+ * @set_epcs: Enable/Disable EPCS for station mode.
* @get_radio_mask: get bitmask of radios in use.
* (invoked with the wiphy mutex held)
+ * @assoc_ml_reconf: Request a non-AP MLO connection to perform ML
+ * reconfiguration, i.e., add and/or remove links to/from the
+ * association using ML reconfiguration action frames. Successfully added
+ * links will be added to the set of valid links. Successfully removed
+ * links will be removed from the set of valid links. The driver must
+ * indicate removed links by calling cfg80211_links_removed() and added
+ * links by calling cfg80211_mlo_reconf_add_done(). When calling
+ * cfg80211_mlo_reconf_add_done() the bss pointer must be given for each
+ * link for which MLO reconfiguration 'add' operation was requested.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4733,7 +4757,7 @@ struct cfg80211_ops {
int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
enum nl80211_tx_power_setting type, int mbm);
int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
- int *dbm);
+ unsigned int link_id, int *dbm);
void (*rfkill_poll)(struct wiphy *wiphy);
@@ -4947,6 +4971,10 @@ struct cfg80211_ops {
int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ttlm_params *params);
u32 (*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev);
+ int (*assoc_ml_reconf)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_ml_reconf_req *req);
+ int (*set_epcs)(struct wiphy *wiphy, struct net_device *dev,
+ bool val);
};
/*
@@ -5957,7 +5985,7 @@ int wiphy_register(struct wiphy *wiphy);
* @wiphy: the wiphy to check the locking on
* @p: The pointer to read, prior to dereferencing
*
- * Return the value of the specified RCU-protected pointer, but omit the
+ * Return: the value of the specified RCU-protected pointer, but omit the
* READ_ONCE(), because caller holds the wiphy mutex used for updates.
*/
#define wiphy_dereference(wiphy, p) \
@@ -6031,6 +6059,10 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
mutex_unlock(&wiphy->mtx);
}
+DEFINE_GUARD(wiphy, struct wiphy *,
+ mutex_lock(&_T->mtx),
+ mutex_unlock(&_T->mtx))
+
struct wiphy_work;
typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *);
@@ -9701,6 +9733,40 @@ static inline int cfg80211_color_change_notify(struct net_device *dev,
void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
/**
+ * struct cfg80211_mlo_reconf_done_data - MLO reconfiguration data
+ * @buf: MLO Reconfiguration Response frame (header + body)
+ * @len: length of the frame data
+ * @added_links: BIT mask of links successfully added to the association
+ * @links: per-link information indexed by link ID
+ * @links.bss: the BSS that MLO reconfiguration was requested for, ownership of
+ * the pointer moves to cfg80211 in the call to
+ * cfg80211_mlo_reconf_add_done().
+ *
+ * The BSS pointer must be set for each link for which 'add' operation was
+ * requested in the assoc_ml_reconf callback.
+ */
+struct cfg80211_mlo_reconf_done_data {
+ const u8 *buf;
+ size_t len;
+ u16 added_links;
+ struct {
+ struct cfg80211_bss *bss;
+ u8 *addr;
+ } links[IEEE80211_MLD_MAX_NUM_LINKS];
+};
+
+/**
+ * cfg80211_mlo_reconf_add_done - Notify about MLO reconfiguration result
+ * @dev: network device.
+ * @data: MLO reconfiguration done data, &struct cfg80211_mlo_reconf_done_data
+ *
+ * Inform cfg80211 and the userspace that processing of ML reconfiguration
+ * request to add links to the association is done.
+ */
+void cfg80211_mlo_reconf_add_done(struct net_device *dev,
+ struct cfg80211_mlo_reconf_done_data *data);
+
+/**
* cfg80211_schedule_channels_check - schedule regulatory check if needed
* @wdev: the wireless device to check
*
@@ -9710,6 +9776,13 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
*/
void cfg80211_schedule_channels_check(struct wireless_dev *wdev);
+/**
+ * cfg80211_epcs_changed - Notify about a change in EPCS state
+ * @netdev: the wireless device whose EPCS state changed
+ * @enabled: set to true if EPCS was enabled, otherwise set to false.
+ */
+void cfg80211_epcs_changed(struct net_device *netdev, bool enabled);
+
#ifdef CONFIG_CFG80211_DEBUGFS
/**
* wiphy_locked_debugfs_read - do a locked read in debugfs
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 243f972267b8..3cbab35de5ab 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -99,12 +99,6 @@ csum_block_add(__wsum csum, __wsum csum2, int offset)
}
static __always_inline __wsum
-csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
-{
- return csum_block_add(csum, csum2, offset);
-}
-
-static __always_inline __wsum
csum_block_sub(__wsum csum, __wsum csum2, int offset)
{
return csum_block_add(csum, ~csum2, offset);
@@ -115,12 +109,6 @@ static __always_inline __wsum csum_unfold(__sum16 n)
return (__force __wsum)n;
}
-static __always_inline
-__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
-{
- return csum_partial(buff, len, sum);
-}
-
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
@@ -164,7 +152,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr);
+ __wsum diff, bool pseudohdr, bool ipv6);
static __always_inline
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index fbb9a2668e24..0091f23a40f7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -35,7 +35,7 @@ struct devlink_port_phys_attrs {
/**
* struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
* @controller: Associated controller number
- * @pf: Associated PCI PF number for this port.
+ * @pf: associated PCI function number for the devlink port instance
* @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_pf_attrs {
@@ -47,8 +47,9 @@ struct devlink_port_pci_pf_attrs {
/**
* struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
* @controller: Associated controller number
- * @pf: Associated PCI PF number for this port.
- * @vf: Associated PCI VF for of the PCI PF for this port.
+ * @pf: associated PCI function number for the devlink port instance
+ * @vf: associated PCI VF number of a PF for the devlink port instance;
+ * VF number starts from 0 for the first PCI virtual function
* @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_vf_attrs {
@@ -61,8 +62,8 @@ struct devlink_port_pci_vf_attrs {
/**
* struct devlink_port_pci_sf_attrs - devlink port's PCI SF attributes
* @controller: Associated controller number
- * @sf: Associated PCI SF for of the PCI PF for this port.
- * @pf: Associated PCI PF number for this port.
+ * @sf: associated SF number of a PF for the devlink port instance
+ * @pf: associated PCI function number for the devlink port instance
* @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_sf_attrs {
@@ -419,11 +420,11 @@ typedef u64 devlink_resource_occ_get_t(void *priv);
#define __DEVLINK_PARAM_MAX_STRING_VALUE 32
enum devlink_param_type {
- DEVLINK_PARAM_TYPE_U8,
- DEVLINK_PARAM_TYPE_U16,
- DEVLINK_PARAM_TYPE_U32,
- DEVLINK_PARAM_TYPE_STRING,
- DEVLINK_PARAM_TYPE_BOOL,
+ DEVLINK_PARAM_TYPE_U8 = DEVLINK_VAR_ATTR_TYPE_U8,
+ DEVLINK_PARAM_TYPE_U16 = DEVLINK_VAR_ATTR_TYPE_U16,
+ DEVLINK_PARAM_TYPE_U32 = DEVLINK_VAR_ATTR_TYPE_U32,
+ DEVLINK_PARAM_TYPE_STRING = DEVLINK_VAR_ATTR_TYPE_STRING,
+ DEVLINK_PARAM_TYPE_BOOL = DEVLINK_VAR_ATTR_TYPE_FLAG,
};
union devlink_param_value {
@@ -1261,6 +1262,18 @@ enum devlink_trap_group_generic_id {
.min_burst = _min_burst, \
}
+#define devlink_fmsg_put(fmsg, name, value) ( \
+ _Generic((value), \
+ bool : devlink_fmsg_bool_pair_put, \
+ u8 : devlink_fmsg_u8_pair_put, \
+ u16 : devlink_fmsg_u32_pair_put, \
+ u32 : devlink_fmsg_u32_pair_put, \
+ u64 : devlink_fmsg_u64_pair_put, \
+ int : devlink_fmsg_u32_pair_put, \
+ char * : devlink_fmsg_string_pair_put, \
+ const char * : devlink_fmsg_string_pair_put) \
+ (fmsg, name, (value)))
+
enum {
/* device supports reload operations */
DEVLINK_F_RELOAD = 1UL << 0,
@@ -1522,6 +1535,7 @@ int devl_trylock(struct devlink *devlink);
void devl_unlock(struct devlink *devlink);
void devl_assert_locked(struct devlink *devlink);
bool devl_lock_is_held(struct devlink *devlink);
+DEFINE_GUARD(devl, struct devlink *, devl_lock(_T), devl_unlock(_T));
struct ib_device;
@@ -1994,6 +2008,7 @@ int devlink_compat_switch_id_get(struct net_device *dev,
int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port);
size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port);
+void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb);
#else
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 6c5a1ea209a2..bcf9d7467e1a 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -6,9 +6,13 @@
#define DEFINE_DROP_REASON(FN, FNe) \
FN(NOT_SPECIFIED) \
FN(NO_SOCKET) \
+ FN(SOCKET_CLOSE) \
+ FN(SOCKET_FILTER) \
+ FN(SOCKET_RCVBUFF) \
+ FN(UNIX_DISCONNECT) \
+ FN(UNIX_SKIP_OOB) \
FN(PKT_TOO_SMALL) \
FN(TCP_CSUM) \
- FN(SOCKET_FILTER) \
FN(UDP_CSUM) \
FN(NETFILTER_DROP) \
FN(OTHERHOST) \
@@ -18,7 +22,6 @@
FN(UNICAST_IN_L2_MULTICAST) \
FN(XFRM_POLICY) \
FN(IP_NOPROTO) \
- FN(SOCKET_RCVBUFF) \
FN(PROTO_MEM) \
FN(TCP_AUTH_HDR) \
FN(TCP_MD5NOTFOUND) \
@@ -36,6 +39,10 @@
FN(TCP_OVERWINDOW) \
FN(TCP_OFOMERGE) \
FN(TCP_RFC7323_PAWS) \
+ FN(TCP_RFC7323_PAWS_ACK) \
+ FN(TCP_RFC7323_TW_PAWS) \
+ FN(TCP_RFC7323_TSECR) \
+ FN(TCP_LISTEN_OVERFLOW) \
FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
FN(TCP_INVALID_ACK_SEQUENCE) \
@@ -55,9 +62,16 @@
FN(NEIGH_FAILED) \
FN(NEIGH_QUEUEFULL) \
FN(NEIGH_DEAD) \
+ FN(NEIGH_HH_FILLFAIL) \
FN(TC_EGRESS) \
FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
+ FN(QDISC_OVERLIMIT) \
+ FN(QDISC_CONGESTED) \
+ FN(CAKE_FLOOD) \
+ FN(FQ_BAND_LIMIT) \
+ FN(FQ_HORIZON_LIMIT) \
+ FN(FQ_FLOW_LIMIT) \
FN(CPU_BACKLOG) \
FN(XDP) \
FN(TC_INGRESS) \
@@ -100,11 +114,13 @@
FN(VXLAN_VNI_NOT_FOUND) \
FN(MAC_INVALID_SOURCE) \
FN(VXLAN_ENTRY_EXISTS) \
- FN(VXLAN_NO_REMOTE) \
+ FN(NO_TX_TARGET) \
FN(IP_TUNNEL_ECN) \
FN(TUNNEL_TXINFO) \
FN(LOCAL_MAC) \
FN(ARP_PVLAN_DISABLE) \
+ FN(MAC_IEEE_MAC_CONTROL) \
+ FN(BRIDGE_INGRESS_STP_STATE) \
FNe(MAX)
/**
@@ -129,12 +145,27 @@ enum skb_drop_reason {
* 3) no valid child socket during 3WHS process
*/
SKB_DROP_REASON_NO_SOCKET,
+ /** @SKB_DROP_REASON_SOCKET_CLOSE: socket is close()d */
+ SKB_DROP_REASON_SOCKET_CLOSE,
+ /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
+ SKB_DROP_REASON_SOCKET_FILTER,
+ /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
+ SKB_DROP_REASON_SOCKET_RCVBUFF,
+ /**
+ * @SKB_DROP_REASON_UNIX_DISCONNECT: recv queue is purged when SOCK_DGRAM
+ * or SOCK_SEQPACKET socket re-connect()s to another socket or notices
+ * during send() that the peer has been close()d.
+ */
+ SKB_DROP_REASON_UNIX_DISCONNECT,
+ /**
+ * @SKB_DROP_REASON_UNIX_SKIP_OOB: Out-Of-Band data is skipped by
+ * recv() without MSG_OOB so dropped.
+ */
+ SKB_DROP_REASON_UNIX_SKIP_OOB,
/** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
SKB_DROP_REASON_PKT_TOO_SMALL,
/** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */
SKB_DROP_REASON_TCP_CSUM,
- /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
- SKB_DROP_REASON_SOCKET_FILTER,
/** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */
SKB_DROP_REASON_UDP_CSUM,
/** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */
@@ -165,8 +196,6 @@ enum skb_drop_reason {
SKB_DROP_REASON_XFRM_POLICY,
/** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */
SKB_DROP_REASON_IP_NOPROTO,
- /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
- SKB_DROP_REASON_SOCKET_RCVBUFF,
/**
* @SKB_DROP_REASON_PROTO_MEM: proto memory limitation, such as
* udp packet drop out of udp_memory_allocated.
@@ -251,6 +280,24 @@ enum skb_drop_reason {
* LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK: PAWS check, old ACK packet.
+ * Corresponds to LINUX_MIB_PAWS_OLD_ACK.
+ */
+ SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_TW_PAWS: PAWS check, socket is in
+ * TIME_WAIT state.
+ * Corresponds to LINUX_MIB_PAWS_TW_REJECTED.
+ */
+ SKB_DROP_REASON_TCP_RFC7323_TW_PAWS,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_TSECR: PAWS check, invalid TSEcr.
+ * Corresponds to LINUX_MIB_TSECRREJECTED.
+ */
+ SKB_DROP_REASON_TCP_RFC7323_TSECR,
+ /** @SKB_DROP_REASON_TCP_LISTEN_OVERFLOW: listener queue full. */
+ SKB_DROP_REASON_TCP_LISTEN_OVERFLOW,
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
@@ -302,6 +349,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_NEIGH_QUEUEFULL,
/** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */
SKB_DROP_REASON_NEIGH_DEAD,
+ /** @SKB_DROP_REASON_NEIGH_HH_FILLFAIL: failed to fill the device hard header */
+ SKB_DROP_REASON_NEIGH_HH_FILLFAIL,
/** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
SKB_DROP_REASON_TC_EGRESS,
/** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
@@ -312,6 +361,36 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_QDISC_DROP,
/**
+ * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
+ * instance exceeds its total buffer size limit.
+ */
+ SKB_DROP_REASON_QDISC_OVERLIMIT,
+ /**
+ * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm
+ * due to congestion.
+ */
+ SKB_DROP_REASON_QDISC_CONGESTED,
+ /**
+ * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of
+ * CAKE qdisc AQM algorithm (BLUE).
+ */
+ SKB_DROP_REASON_CAKE_FLOOD,
+ /**
+ * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band
+ * limit is reached.
+ */
+ SKB_DROP_REASON_FQ_BAND_LIMIT,
+ /**
+ * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet
+ * timestamp is too far in the future.
+ */
+ SKB_DROP_REASON_FQ_HORIZON_LIMIT,
+ /**
+ * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow
+ * exceeds its limits.
+ */
+ SKB_DROP_REASON_FQ_FLOW_LIMIT,
+ /**
* @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
* backlog queue. This can be caused by backlog queue full (see
* netdev_max_backlog in net.rst) or RPS flow limit
@@ -461,8 +540,8 @@ enum skb_drop_reason {
* entry or an entry pointing to a nexthop.
*/
SKB_DROP_REASON_VXLAN_ENTRY_EXISTS,
- /** @SKB_DROP_REASON_VXLAN_NO_REMOTE: no remote found for xmit */
- SKB_DROP_REASON_VXLAN_NO_REMOTE,
+ /** @SKB_DROP_REASON_NO_TX_TARGET: no target found for xmit */
+ SKB_DROP_REASON_NO_TX_TARGET,
/**
* @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to
* RFC 6040 4.2, see __INET_ECN_decapsulate() for detail.
@@ -485,6 +564,16 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_ARP_PVLAN_DISABLE,
/**
+ * @SKB_DROP_REASON_MAC_IEEE_MAC_CONTROL: the destination MAC address
+ * is an IEEE MAC Control address.
+ */
+ SKB_DROP_REASON_MAC_IEEE_MAC_CONTROL,
+ /**
+ * @SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE: the STP state of the
+ * ingress bridge port does not allow frames to be forwarded.
+ */
+ SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE,
+ /**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
*/
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 56cb7be92244..7d3b1a2a6fec 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -18,12 +18,6 @@ enum skb_drop_reason_subsys {
SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE,
/**
- * @SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR: mac80211 drop reasons
- * for frames still going to monitor, see net/mac80211/drop.h
- */
- SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR,
-
- /**
* @SKB_DROP_REASON_SUBSYS_OPENVSWITCH: openvswitch drop reasons,
* see net/openvswitch/drop.h
*/
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 72ae65e7246a..55e2d97f247e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -906,6 +906,8 @@ struct dsa_switch_ops {
void (*get_rmon_stats)(struct dsa_switch *ds, int port,
struct ethtool_rmon_stats *rmon_stats,
const struct ethtool_rmon_hist_range **ranges);
+ void (*get_ts_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_ts_stats *ts_stats);
void (*get_stats64)(struct dsa_switch *ds, int port,
struct rtnl_link_stats64 *s);
void (*get_pause_stats)(struct dsa_switch *ds, int port,
@@ -988,10 +990,9 @@ struct dsa_switch_ops {
/*
* Port's MAC EEE settings
*/
+ bool (*support_eee)(struct dsa_switch *ds, int port);
int (*set_mac_eee)(struct dsa_switch *ds, int port,
struct ethtool_keee *e);
- int (*get_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_keee *e);
/* EEPROM access */
int (*get_eeprom_len)(struct dsa_switch *ds);
@@ -1130,9 +1131,10 @@ struct dsa_switch_ops {
* PTP functionality
*/
int (*port_hwtstamp_get)(struct dsa_switch *ds, int port,
- struct ifreq *ifr);
+ struct kernel_hwtstamp_config *config);
int (*port_hwtstamp_set)(struct dsa_switch *ds, int port,
- struct ifreq *ifr);
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
void (*port_txtstamp)(struct dsa_switch *ds, int port,
struct sk_buff *skb);
bool (*port_rxtstamp)(struct dsa_switch *ds, int port,
@@ -1383,5 +1385,6 @@ static inline bool dsa_user_dev_check(const struct net_device *dev)
netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
+bool dsa_supports_eee(struct dsa_switch *ds, int port);
#endif
diff --git a/include/net/dst.h b/include/net/dst.h
index 08647c99d79c..32dafbab4cd0 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -307,7 +307,7 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
* @skb: buffer
*
* If dst is not yet refcounted and not destroyed, grab a ref on it.
- * Returns true if dst is refcounted.
+ * Returns: true if dst is refcounted.
*/
static inline bool skb_dst_force(struct sk_buff *skb)
{
@@ -456,7 +456,7 @@ INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
/* Output packet to network from transport. */
static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return INDIRECT_CALL_INET(skb_dst(skb)->output,
+ return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->output),
ip6_output, ip_output,
net, sk, skb);
}
@@ -466,7 +466,7 @@ INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *));
/* Input packet from network to transport. */
static inline int dst_input(struct sk_buff *skb)
{
- return INDIRECT_CALL_INET(skb_dst(skb)->input,
+ return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->input),
ip6_input, ip_local_deliver, skb);
}
@@ -561,6 +561,26 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}
+static inline struct net_device *dst_dev(const struct dst_entry *dst)
+{
+ return READ_ONCE(dst->dev);
+}
+
+static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
+{
+ return dst_dev(skb_dst(skb));
+}
+
+static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
+{
+ return dev_net(skb_dst_dev(skb));
+}
+
+static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
+{
+ return dev_net_rcu(skb_dst_dev(skb));
+}
+
struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu, bool confirm_neigh);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 84c15402931c..4160731dcb6e 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -163,11 +163,8 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
if (!new_md)
return ERR_PTR(-ENOMEM);
- unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
- sizeof(struct ip_tunnel_info) + md_size,
- /* metadata_dst_alloc() reserves room (md_size bytes) for
- * options right after the ip_tunnel_info struct.
- */);
+ memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+ sizeof(struct ip_tunnel_info) + md_size);
#ifdef CONFIG_DST_CACHE
/* Unclone the dst cache if there is one */
if (new_md->u.tun_info.dst_cache.cache) {
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 04383d90a1e3..6e68e359ad18 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -43,6 +43,10 @@ struct fib_rule {
struct fib_kuid_range uid_range;
struct fib_rule_port_range sport_range;
struct fib_rule_port_range dport_range;
+ u16 sport_mask;
+ u16 dport_mask;
+ u8 iif_is_l3_master;
+ u8 oif_is_l3_master;
struct rcu_head rcu;
};
@@ -146,6 +150,17 @@ static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
ntohs(port) <= a->end;
}
+static inline bool fib_rule_port_match(const struct fib_rule_port_range *range,
+ u16 port_mask, __be16 port)
+{
+ if ((range->start ^ ntohs(port)) & port_mask)
+ return false;
+ if (!port_mask && fib_rule_port_range_set(range) &&
+ !fib_rule_port_inrange(range, port))
+ return false;
+ return true;
+}
+
static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
{
return a->start != 0 && a->end != 0 && a->end < 0xffff &&
@@ -159,6 +174,12 @@ static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
a->end == b->end;
}
+static inline bool
+fib_rule_port_is_range(const struct fib_rule_port_range *range)
+{
+ return range->start != range->end;
+}
+
static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
{
return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto ||
@@ -178,10 +199,10 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
struct netlink_ext_ack *extack);
unsigned int fib_rules_seq_read(const struct net *net, int family);
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack);
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack);
+int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held);
+int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held);
INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule,
struct flowi *fl, int flags));
diff --git a/include/net/flow.h b/include/net/flow.h
index 335bbc52171c..a1839c278d87 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -38,6 +38,8 @@ struct flowi_common {
__u8 flowic_flags;
#define FLOWI_FLAG_ANYSRC 0x01
#define FLOWI_FLAG_KNOWN_NH 0x02
+#define FLOWI_FLAG_L3MDEV_OIF 0x04
+#define FLOWI_FLAG_ANY_SPORT 0x08
__u32 flowic_secid;
kuid_t flowic_uid;
__u32 flowic_multipath_hash;
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index d096cc6352de..a03d56765832 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -354,7 +354,7 @@ __genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags)
* such requests) or a struct initialized by genl_info_init_ntf()
* when constructing notifications.
*
- * Returns pointer to new genetlink header.
+ * Returns: pointer to new genetlink header.
*/
static inline void *
genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
@@ -366,7 +366,7 @@ genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
* genlmsg_nlhdr - Obtain netlink header from user specified header
* @user_hdr: user header as returned from genlmsg_put()
*
- * Returns pointer to netlink header.
+ * Returns: pointer to netlink header.
*/
static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr)
{
@@ -435,7 +435,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb,
* @flags: netlink message flags
* @cmd: generic netlink command
*
- * Returns pointer to user specific header
+ * Returns: pointer to user specific header
*/
static inline void *genlmsg_put_reply(struct sk_buff *skb,
struct genl_info *info,
diff --git a/include/net/gro.h b/include/net/gro.h
index 7b548f91754b..22d3a69e4404 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -509,28 +509,46 @@ static inline int gro_receive_network_flush(const void *th, const void *th2,
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
+void __gro_flush(struct gro_node *gro, bool flush_old);
+
+static inline void gro_flush(struct gro_node *gro, bool flush_old)
+{
+ if (!gro->bitmask)
+ return;
+
+ __gro_flush(gro, flush_old);
+}
+
+static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+{
+ gro_flush(&napi->gro, flush_old);
+}
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static inline void gro_normal_list(struct napi_struct *napi)
+static inline void gro_normal_list(struct gro_node *gro)
{
- if (!napi->rx_count)
+ if (!gro->rx_count)
return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
+ netif_receive_skb_list_internal(&gro->rx_list);
+ INIT_LIST_HEAD(&gro->rx_list);
+ gro->rx_count = 0;
}
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
* pass the whole batch up to the stack.
*/
-static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
+static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb,
+ int segs)
{
- list_add_tail(&skb->list, &napi->rx_list);
- napi->rx_count += segs;
- if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
- gro_normal_list(napi);
+ list_add_tail(&skb->list, &gro->rx_list);
+ gro->rx_count += segs;
+ if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
+ gro_normal_list(gro);
}
+void gro_init(struct gro_node *gro);
+void gro_cleanup(struct gro_node *gro);
+
/* This function is the alternative of 'inet_iif' and 'inet_sdif'
* functions in case we can not rely on fields of IPCB.
*
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 30e9570beb2a..fda94b2647ff 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -23,7 +23,6 @@ struct net_hotdata {
struct net_offload udpv6_offload;
#endif
struct list_head offload_base;
- struct list_head ptype_all;
struct kmem_cache *skbuff_cache;
struct kmem_cache *skbuff_fclone_cache;
struct kmem_cache *skb_small_head_cache;
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 025bd8d3c769..745891d2e113 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -21,8 +21,6 @@ struct sockaddr;
struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6,
const struct request_sock *req, u8 proto);
-void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-
int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 74dd90ff5f12..c32878c69179 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -150,7 +150,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
int iif, int sdif,
bool *refcounted)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = dev_net_rcu(skb_dst(skb)->dev);
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct sock *sk;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c7f42844c79a..1735db332aab 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -44,12 +44,10 @@ struct inet_connection_sock_af_ops {
struct request_sock *req_unhash,
bool *own_req);
u16 net_header_len;
- u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
- void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
void (*mtu_reduced)(struct sock *sk);
};
@@ -58,7 +56,6 @@ struct inet_connection_sock_af_ops {
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_bind2_hash: Bind node in the bhash2 table
- * @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
* @icsk_pmtu_cookie Last pmtu seen by socket
@@ -66,7 +63,6 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
- * @icsk_clean_acked Clean acked data hook
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
@@ -85,18 +81,17 @@ struct inet_connection_sock {
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
struct inet_bind2_bucket *icsk_bind2_hash;
- unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
__u32 icsk_rto_min;
+ u32 icsk_rto_max;
__u32 icsk_delack_max;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void __rcu *icsk_ulp_data;
- void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:5,
icsk_ca_initialized:1,
@@ -116,8 +111,8 @@ struct inet_connection_sock {
#define ATO_BITS 8
__u32 ato:ATO_BITS, /* Predicted tick of soft clock */
lrcv_flowlabel:20, /* last received ipv6 flowlabel */
- unused:4;
- unsigned long timeout; /* Currently scheduled timeout */
+ dst_quick_ack:1, /* cache dst RTAX_QUICKACK */
+ unused:3;
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
__u16 rcv_mss; /* MSS used for delayed ACK decisions */
@@ -189,8 +184,17 @@ static inline void inet_csk_delack_init(struct sock *sk)
memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
}
-void inet_csk_delete_keepalive_timer(struct sock *sk);
-void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
+static inline unsigned long
+icsk_timeout(const struct inet_connection_sock *icsk)
+{
+ return READ_ONCE(icsk->icsk_retransmit_timer.expires);
+}
+
+static inline unsigned long
+icsk_delack_timeout(const struct inet_connection_sock *icsk)
+{
+ return READ_ONCE(icsk->icsk_delack_timer.expires);
+}
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
@@ -227,16 +231,15 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
when = max_when;
}
+ when += jiffies;
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
smp_store_release(&icsk->icsk_pending, what);
- icsk->icsk_timeout = jiffies + when;
- sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
+ sk_reset_timer(sk, &icsk->icsk_retransmit_timer, when);
} else if (what == ICSK_TIME_DACK) {
smp_store_release(&icsk->icsk_ack.pending,
icsk->icsk_ack.pending | ICSK_ACK_TIMER);
- icsk->icsk_ack.timeout = jiffies + when;
- sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
+ sk_reset_timer(sk, &icsk->icsk_delack_timer, when);
} else {
pr_debug("inet_csk BUG: unknown timer value\n");
}
@@ -318,8 +321,6 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
int inet_csk_listen_start(struct sock *sk);
void inet_csk_listen_stop(struct sock *sk);
-void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-
/* update the fast reuse flag when adding a socket */
void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct sock *sk);
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 5af6eb14c5db..0eccd9c3a883 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -137,7 +137,7 @@ static inline void fqdir_pre_exit(struct fqdir *fqdir)
}
void fqdir_exit(struct fqdir *fqdir);
-void inet_frag_kill(struct inet_frag_queue *q);
+void inet_frag_kill(struct inet_frag_queue *q, int *refs);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
@@ -145,9 +145,9 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
unsigned int inet_frag_rbtree_purge(struct rb_root *root,
enum skb_drop_reason reason);
-static inline void inet_frag_put(struct inet_frag_queue *q)
+static inline void inet_frag_putn(struct inet_frag_queue *q, int refs)
{
- if (refcount_dec_and_test(&q->refcnt))
+ if (refs && refcount_sub_and_test(refs, &q->refcnt))
inet_frag_destroy(q);
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5eea47f135a4..4564b5d348b1 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -89,6 +89,7 @@ struct inet_bind_bucket {
bool fast_ipv6_only;
struct hlist_node node;
struct hlist_head bhash2;
+ struct rcu_head rcu;
};
struct inet_bind2_bucket {
@@ -174,14 +175,9 @@ struct inet_hashinfo {
bool pernet;
} ____cacheline_aligned_in_smp;
-static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
+static inline struct inet_hashinfo *tcp_get_hashinfo(const struct sock *sk)
{
-#if IS_ENABLED(CONFIG_IP_DCCP)
- return sk->sk_prot->h.hashinfo ? :
- sock_net(sk)->ipv4.tcp_death_row.hashinfo;
-#else
return sock_net(sk)->ipv4.tcp_death_row.hashinfo;
-#endif
}
static inline struct inet_listen_hashbucket *
@@ -226,8 +222,7 @@ struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
struct inet_bind_hashbucket *head,
const unsigned short snum, int l3mdev);
-void inet_bind_bucket_destroy(struct kmem_cache *cachep,
- struct inet_bind_bucket *tb);
+void inet_bind_bucket_destroy(struct inet_bind_bucket *tb);
bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
const struct net *net, unsigned short port,
@@ -492,7 +487,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
const int sdif,
bool *refcounted)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = dev_net_rcu(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
struct sock *sk;
@@ -527,9 +522,12 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u64 port_offset,
+ u32 hash_port0,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
- struct inet_timewait_sock **));
+ struct inet_timewait_sock **,
+ bool rcu_lookup,
+ u32 hash));
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 56d8bc5593d3..1086256549fa 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -19,6 +19,7 @@
#include <linux/netdevice.h>
#include <net/flow.h>
+#include <net/inet_dscp.h>
#include <net/sock.h>
#include <net/request_sock.h>
#include <net/netns/hash.h>
@@ -172,7 +173,7 @@ struct inet_cork {
u8 tx_flags;
__u8 ttl;
__s16 tos;
- char priority;
+ u32 priority;
__u16 gso_size;
u32 ts_opt_id;
u64 transmit_time;
@@ -302,6 +303,11 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL;
}
+static inline dscp_t inet_sk_dscp(const struct inet_sock *inet)
+{
+ return inet_dsfield_to_dscp(READ_ONCE(inet->tos));
+}
+
#define inet_test_bit(nr, sk) \
test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
#define inet_set_bit(nr, sk) \
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 62c0a7e65d6b..67a313575780 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -74,6 +74,10 @@ struct inet_timewait_sock {
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
+ /**
+ * @tw_reuse_stamp: Time of entry into %TCP_TIME_WAIT state in msec.
+ */
+ u32 tw_entry_stamp;
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
struct inet_bind2_bucket *tw_tb2;
diff --git a/include/net/ip.h b/include/net/ip.h
index 23ecb10945b0..47ed6d23853d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -81,7 +81,6 @@ struct ipcm_cookie {
__u8 protocol;
__u8 ttl;
__s16 tos;
- char priority;
__u16 gso_size;
};
@@ -93,10 +92,12 @@ static inline void ipcm_init(struct ipcm_cookie *ipcm)
static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
const struct inet_sock *inet)
{
- ipcm_init(ipcm);
+ *ipcm = (struct ipcm_cookie) {
+ .tos = READ_ONCE(inet->tos),
+ };
+
+ sockcm_init(&ipcm->sockc, &inet->sk);
- ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
- ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
ipcm->protocol = inet->inet_num;
@@ -257,13 +258,6 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
return RT_SCOPE_UNIVERSE;
}
-static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
-{
- u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos);
-
- return dsfield & INET_DSCP_MASK;
-}
-
/* datagram.c */
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
@@ -363,7 +357,7 @@ static inline void inet_get_local_port_range(const struct net *net, int *low, in
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
#ifdef CONFIG_SYSCTL
-static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
+static inline bool inet_is_local_reserved_port(const struct net *net, unsigned short port)
{
if (!net->ipv4.sysctl_local_reserved_ports)
return false;
@@ -692,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
#endif
+#if IS_MODULE(CONFIG_IPV6)
+#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
+#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
+#else
+#define EXPORT_IPV6_MOD(X)
+#define EXPORT_IPV6_MOD_GPL(X)
+#endif
+
static inline unsigned int ipv4_addr_hash(__be32 ip)
{
return (__force unsigned int) ip;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 7c87873ae211..88b0dd4d8e09 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -198,6 +198,7 @@ struct fib6_info {
fib6_destroying:1,
unused:4;
+ struct list_head purge_link;
struct rcu_head rcu;
struct nexthop *nh;
struct fib6_nh fib6_nh[];
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a113c11ab56b..48bb3cf41469 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -162,6 +162,8 @@ struct fib_info {
struct fib_nh fib_nh[] __counted_by(fib_nhs);
};
+int __net_init fib4_semantics_init(struct net *net);
+void __net_exit fib4_semantics_exit(struct net *net);
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rule;
@@ -572,7 +574,8 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net,
int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
struct netlink_ext_ack *extack);
-void fib_select_multipath(struct fib_result *res, int hash);
+void fib_select_multipath(struct fib_result *res, int hash,
+ const struct flowi4 *fl4);
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1aa31bdb2b31..0c3d571a04a1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -95,8 +95,8 @@ struct ip_tunnel_encap {
#define ip_tunnel_info_opts(info) \
_Generic(info, \
- const struct ip_tunnel_info * : ((const void *)((info) + 1)),\
- struct ip_tunnel_info * : ((void *)((info) + 1))\
+ const struct ip_tunnel_info * : ((const void *)(info)->options),\
+ struct ip_tunnel_info * : ((void *)(info)->options)\
)
struct ip_tunnel_info {
@@ -107,6 +107,7 @@ struct ip_tunnel_info {
#endif
u8 options_len;
u8 mode;
+ u8 options[] __aligned_largest __counted_by(options_len);
};
/* 6rd prefix/relay information */
@@ -376,10 +377,9 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev);
int ip_tunnel_get_iflink(const struct net_device *dev);
int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
-
-void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
- struct rtnl_link_ops *ops,
- struct list_head *dev_to_kill);
+void ip_tunnel_delete_net(struct net *net, unsigned int id,
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
@@ -406,8 +406,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
bool log_ecn_error);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm_kern *p, __u32 fwmark);
-int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm_kern *p, __u32 fwmark);
+int ip_tunnel_newlink(struct net *net, struct net_device *dev,
+ struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
+ __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
@@ -650,7 +651,7 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
static inline void ip_tunnel_info_opts_get(void *to,
const struct ip_tunnel_info *info)
{
- memcpy(to, info + 1, info->options_len);
+ memcpy(to, ip_tunnel_info_opts(info), info->options_len);
}
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index ff406ef4fd4a..29a36709e7f3 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1163,6 +1163,14 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
return housekeeping_cpumask(HK_TYPE_KTHREAD);
}
+static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs)
+{
+ if (ipvs->est_cpulist_valid)
+ return ipvs->sysctl_est_cpulist;
+ else
+ return NULL;
+}
+
static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_est_nice;
@@ -1270,6 +1278,11 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
return housekeeping_cpumask(HK_TYPE_KTHREAD);
}
+static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs)
+{
+ return NULL;
+}
+
static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
{
return IPVS_EST_NICE;
diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index 8660a2a6d1fc..51401f01e2a5 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -3,20 +3,9 @@
#define _NET_IPCOMP_H
#include <linux/skbuff.h>
-#include <linux/types.h>
-
-#define IPCOMP_SCRATCH_SIZE 65400
-
-struct crypto_comp;
-struct ip_comp_hdr;
-
-struct ipcomp_data {
- u16 threshold;
- struct crypto_comp * __percpu *tfms;
-};
struct ip_comp_hdr;
-struct sk_buff;
+struct netlink_ext_ack;
struct xfrm_state;
int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 248bfb26e2af..2ccdf85f34f1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -246,17 +246,20 @@ extern int sysctl_mld_qrv;
#define _DEVADD(net, statname, mod, idev, field, val) \
({ \
struct inet6_dev *_idev = (idev); \
+ unsigned long _field = (field); \
+ unsigned long _val = (val); \
if (likely(_idev != NULL)) \
- mod##SNMP_ADD_STATS((_idev)->stats.statname, (field), (val)); \
- mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, (field), (val));\
+ mod##SNMP_ADD_STATS((_idev)->stats.statname, _field, _val); \
+ mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, _field, _val);\
})
#define _DEVUPD(net, statname, mod, idev, field, val) \
({ \
struct inet6_dev *_idev = (idev); \
+ unsigned long _val = (val); \
if (likely(_idev != NULL)) \
- mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, (val)); \
- mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, (val));\
+ mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, _val); \
+ mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, _val);\
})
/* MIBs */
@@ -363,15 +366,6 @@ struct ipcm6_cookie {
struct ipv6_txoptions *opt;
};
-static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
-{
- *ipc6 = (struct ipcm6_cookie) {
- .hlimit = -1,
- .tclass = -1,
- .dontfrag = -1,
- };
-}
-
static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
const struct sock *sk)
{
@@ -380,6 +374,8 @@ static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
.tclass = inet6_sk(sk)->tclass,
.dontfrag = inet6_test_bit(DONTFRAG, sk),
};
+
+ sockcm_init(&ipc6->sockc, sk);
}
static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
@@ -471,7 +467,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
/* This helper is specialized for BIG TCP needs.
* It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
* It assumes headers are already in skb->head.
- * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there.
+ * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there.
*/
static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
{
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 7321ffe3a108..38ef66826939 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -66,6 +66,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
struct sk_buff *head;
+ int refs = 1;
rcu_read_lock();
/* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
@@ -77,7 +78,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
goto out;
fq->q.flags |= INET_FRAG_DROP;
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, &refs);
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
@@ -109,7 +110,7 @@ out:
spin_unlock(&fq->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- inet_frag_put(&fq->q);
+ inet_frag_putn(&fq->q, refs);
}
/* Check if the upper layer header is truncated in the first fragment. */
diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index dd9e93c12260..9804fa5d9c67 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -202,7 +202,7 @@ struct iucv_handler {
*
* Registers a driver with IUCV.
*
- * Returns 0 on success, -ENOMEM if the memory allocation for the pathid
+ * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid
* table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus.
*/
int iucv_register(struct iucv_handler *handler, int smp);
@@ -224,7 +224,7 @@ void iucv_unregister(struct iucv_handler *handle, int smp);
*
* Allocate a new path structure for use with iucv_connect.
*
- * Returns NULL if the memory allocation failed or a pointer to the
+ * Returns: NULL if the memory allocation failed or a pointer to the
* path structure.
*/
static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp)
@@ -260,7 +260,7 @@ static inline void iucv_path_free(struct iucv_path *path)
* This function is issued after the user received a connection pending
* external interrupt and now wishes to complete the IUCV communication path.
*
- * Returns the result of the CP IUCV call.
+ * Returns: the result of the CP IUCV call.
*/
int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
u8 *userdata, void *private);
@@ -278,7 +278,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
* successfully, you are not able to use the path until you receive an IUCV
* Connection Complete external interrupt.
*
- * Returns the result of the CP IUCV call.
+ * Returns: the result of the CP IUCV call.
*/
int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
u8 *userid, u8 *system, u8 *userdata,
@@ -292,7 +292,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
* This function temporarily suspends incoming messages on an IUCV path.
* You can later reactivate the path by invoking the iucv_resume function.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_quiesce(struct iucv_path *path, u8 *userdata);
@@ -304,7 +304,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata);
* This function resumes incoming messages on an IUCV path that has
* been stopped with iucv_path_quiesce.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_resume(struct iucv_path *path, u8 *userdata);
@@ -315,7 +315,7 @@ int iucv_path_resume(struct iucv_path *path, u8 *userdata);
*
* This function terminates an IUCV path.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_sever(struct iucv_path *path, u8 *userdata);
@@ -327,7 +327,7 @@ int iucv_path_sever(struct iucv_path *path, u8 *userdata);
*
* Cancels a message you have sent.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
u32 srccls);
@@ -347,7 +347,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: local_bh_enable/local_bh_disable
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size, size_t *residual);
@@ -367,7 +367,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: no locking.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size,
@@ -382,7 +382,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
* are notified of a message and the time that you complete the message,
* the message may be rejected.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
@@ -399,7 +399,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
* pathid, msgid, and trgcls. Prmmsg signifies the data is moved into
* the parameter list.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *reply, size_t size);
@@ -419,7 +419,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: local_bh_enable/local_bh_disable
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
@@ -439,7 +439,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: no locking.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
@@ -461,7 +461,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
* reply to the message and a buffer is provided into which IUCV moves
* the reply to this message.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size,
diff --git a/include/net/kcm.h b/include/net/kcm.h
index 441e993be634..d9c35e71ecea 100644
--- a/include/net/kcm.h
+++ b/include/net/kcm.h
@@ -71,7 +71,6 @@ struct kcm_sock {
struct list_head wait_psock_list;
struct sk_buff *seq_skb;
struct mutex tx_mutex;
- u32 tx_stopped : 1;
/* Don't use bit fields here, these are set under different locks */
bool tx_wait;
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index f7fe796e8429..1eb8dad18f7e 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg);
+static inline
+bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
+{
+ return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) &&
+ fl->flowi_l3mdev == iifindex;
+}
+
+static inline
+bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
+{
+ return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF &&
+ fl->flowi_l3mdev == oifindex;
+}
+
void l3mdev_update_flow(struct net *net, struct flowi *fl);
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
@@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
{
return 1;
}
+
+static inline
+bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
+{
+ return false;
+}
+
+static inline
+bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
+{
+ return false;
+}
+
static inline
void l3mdev_update_flow(struct net *net, struct flowi *fl)
{
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
index 43574bd6612f..ab05024be518 100644
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
@@ -198,6 +198,53 @@ struct libeth_rx_pt {
enum xdp_rss_hash_type hash_type:16;
};
+/**
+ * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor
+ * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware
+ * @ipe: IP checksum error
+ * @eipe: external (outermost) IP header (only for tunels)
+ * @eudpe: external (outermost) UDP checksum error (only for tunels)
+ * @ipv6exadd: IPv6 header with extension headers
+ * @l4e: L4 integrity error
+ * @pprs: set for packets that skip checksum calculation in the HW pre parser
+ * @nat: the packet is a UDP tunneled packet
+ * @raw_csum_valid: set if raw checksum is valid
+ * @pad: padding to naturally align raw_csum field
+ * @raw_csum: raw checksum
+ */
+struct libeth_rx_csum {
+ u32 l3l4p:1;
+ u32 ipe:1;
+ u32 eipe:1;
+ u32 eudpe:1;
+ u32 ipv6exadd:1;
+ u32 l4e:1;
+ u32 pprs:1;
+ u32 nat:1;
+
+ u32 raw_csum_valid:1;
+ u32 pad:7;
+ u32 raw_csum:16;
+};
+
+/**
+ * struct libeth_rqe_info - receive queue element info
+ * @len: packet length
+ * @ptype: packet type based on types programmed into the device
+ * @eop: whether it's the last fragment of the packet
+ * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error
+ * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration
+ */
+struct libeth_rqe_info {
+ u32 len;
+
+ u32 ptype:14;
+ u32 eop:1;
+ u32 rxe:1;
+
+ u32 vlan:16;
+};
+
void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
/**
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 53bd2d02a4f0..26232f603e33 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -138,12 +138,12 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
static inline void lwtunnel_set_redirect(struct dst_entry *dst)
{
if (lwtunnel_output_redirect(dst->lwtstate)) {
- dst->lwtstate->orig_output = dst->output;
- dst->output = lwtunnel_output;
+ dst->lwtstate->orig_output = READ_ONCE(dst->output);
+ WRITE_ONCE(dst->output, lwtunnel_output);
}
if (lwtunnel_input_redirect(dst->lwtstate)) {
- dst->lwtstate->orig_input = dst->input;
- dst->input = lwtunnel_input;
+ dst->lwtstate->orig_input = READ_ONCE(dst->input);
+ WRITE_ONCE(dst->input, lwtunnel_input);
}
}
#else
@@ -206,6 +206,7 @@ static inline int lwtunnel_valid_encap_type(u16 encap_type,
NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel");
return -EOPNOTSUPP;
}
+
static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
struct netlink_ext_ack *extack)
{
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ab8dce1f2c27..2e9d1c0bf5ac 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2024 Intel Corporation
+ * Copyright (C) 2018 - 2025 Intel Corporation
*/
#ifndef MAC80211_H
@@ -682,6 +682,9 @@ struct ieee80211_parsed_tpe {
* responder functionality.
* @ftmr_params: configurable lci/civic parameter when enabling FTM responder.
* @nontransmitted: this BSS is a nontransmitted BSS profile
+ * @tx_bss_conf: Pointer to the BSS configuration of transmitting interface
+ * if MBSSID is enabled. This pointer is RCU-protected due to CSA finish
+ * and BSS color change flows accessing it.
* @transmitter_bssid: the address of transmitter AP
* @bssid_index: index inside the multiple BSSID set
* @bssid_indicator: 2^bssid_indicator is the maximum number of APs in set
@@ -702,6 +705,7 @@ struct ieee80211_parsed_tpe {
* @tpe: transmit power envelope information
* @pwr_reduction: power constraint of BSS.
* @eht_support: does this BSS support EHT
+ * @epcs_support: does this BSS support EPCS
* @csa_active: marks whether a channel switch is going on.
* @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @chanctx_conf: The channel context this interface is assigned to, or %NULL
@@ -740,6 +744,7 @@ struct ieee80211_parsed_tpe {
* @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the
* reception of an EHT TB PPDU on an RU that spans the entire PPDU
* bandwidth
+ * @eht_disable_mcs15: disable EHT-MCS 15 reception capability.
* @bss_param_ch_cnt: in BSS-mode, the BSS params change count. This
* information is the latest known value. It can come from this link's
* beacon or from a beacon sent by another link.
@@ -803,6 +808,7 @@ struct ieee80211_bss_conf {
struct ieee80211_ftm_responder_params *ftmr_params;
/* Multiple BSSID data */
bool nontransmitted;
+ struct ieee80211_bss_conf __rcu *tx_bss_conf;
u8 transmitter_bssid[ETH_ALEN];
u8 bssid_index;
u8 bssid_indicator;
@@ -823,7 +829,7 @@ struct ieee80211_bss_conf {
u8 pwr_reduction;
bool eht_support;
-
+ bool epcs_support;
bool csa_active;
bool mu_mimo_owner;
@@ -847,6 +853,8 @@ struct ieee80211_bss_conf {
bool eht_su_beamformee;
bool eht_mu_beamformer;
bool eht_80mhz_full_bw_ul_mumimo;
+ bool eht_disable_mcs15;
+
u8 bss_param_ch_cnt;
u8 bss_param_ch_cnt_link_id;
};
@@ -1855,6 +1863,9 @@ struct ieee80211_channel_switch {
* operation on this interface and request a channel context without
* the AP definition. Use this e.g. because the device is able to
* handle OFDMA (downlink and trigger for uplink) on a per-AP basis.
+ * @IEEE80211_VIF_REMOVE_AP_AFTER_DISASSOC: indicates that the AP sta should
+ * be removed only after setting the vif as unassociated, and not the
+ * opposite. Only relevant for STA vifs.
*/
enum ieee80211_vif_flags {
IEEE80211_VIF_BEACON_FILTER = BIT(0),
@@ -1863,6 +1874,7 @@ enum ieee80211_vif_flags {
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
IEEE80211_VIF_EML_ACTIVE = BIT(4),
IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW = BIT(5),
+ IEEE80211_VIF_REMOVE_AP_AFTER_DISASSOC = BIT(6),
};
@@ -2018,7 +2030,6 @@ enum ieee80211_neg_ttlm_res {
* @txq: the multicast data TX queue
* @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
* &enum ieee80211_offload_flags.
- * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled.
*/
struct ieee80211_vif {
enum nl80211_iftype type;
@@ -2047,8 +2058,6 @@ struct ieee80211_vif {
bool probe_req_reg;
bool rx_mcast_action_reg;
- struct ieee80211_vif *mbssid_tx_vif;
-
/* must be last */
u8 drv_priv[] __aligned(sizeof(void *));
};
@@ -2216,7 +2225,7 @@ enum ieee80211_key_flags {
* @tx_pn: PN used for TX keys, may be used by the driver as well if it
* needs to do software PN assignment by itself (e.g. due to TSO)
* @flags: key flags, see &enum ieee80211_key_flags.
- * @keyidx: the key index (0-3)
+ * @keyidx: the key index (0-7)
* @keylen: key material length
* @key: key material. For ALG_TKIP the key is encoded as a 256-bit (32 byte)
* data block:
@@ -2225,7 +2234,7 @@ enum ieee80211_key_flags {
* - Temporal Authenticator Rx MIC Key (64 bits)
* @icv_len: The ICV length for this key type
* @iv_len: The IV length for this key type
- * @link_id: the link ID for MLO, or -1 for non-MLO or pairwise keys
+ * @link_id: the link ID, 0 for non-MLO, or -1 for pairwise keys
*/
struct ieee80211_key_conf {
atomic64_t tx_pn;
@@ -2336,6 +2345,8 @@ enum ieee80211_sta_rx_bandwidth {
IEEE80211_STA_RX_BW_320,
};
+#define IEEE80211_STA_RX_BW_MAX IEEE80211_STA_RX_BW_320
+
/**
* struct ieee80211_sta_rates - station rate selection table
*
@@ -2481,6 +2492,7 @@ struct ieee80211_link_sta {
* @max_amsdu_subframes: indicates the maximal number of MSDUs in a single
* A-MSDU. Taken from the Extended Capabilities element. 0 means
* unlimited.
+ * @eml_cap: EML capabilities of this MLO station
* @cur: currently valid data as aggregated from the active links
* For non MLO STA it will point to the deflink data. For MLO STA
* ieee80211_sta_recalc_aggregates() must be called to update it.
@@ -2515,6 +2527,7 @@ struct ieee80211_sta {
bool mlo;
bool spp_amsdu;
u8 max_amsdu_subframes;
+ u16 eml_cap;
struct ieee80211_sta_aggregates *cur;
@@ -2845,6 +2858,11 @@ struct ieee80211_txq {
* implements MLO, so operation can continue on other links when one
* link is switching.
*
+ * @IEEE80211_HW_STRICT: strictly enforce certain things mandated by the spec
+ * but otherwise ignored/worked around for interoperability. This is a
+ * HW flag so drivers can opt in according to their own control, e.g. in
+ * testing.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2905,6 +2923,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_DISALLOW_PUNCTURING,
IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ,
IEEE80211_HW_HANDLES_QUIET_CSA,
+ IEEE80211_HW_STRICT,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -3817,7 +3836,7 @@ enum ieee80211_reconfig_type {
* @was_assoc: set if this call is due to deauth/disassoc
* while just having been associated
* @link_id: the link id on which the frame will be TX'ed.
- * Only used with the mgd_prepare_tx() method.
+ * 0 for a non-MLO connection.
*/
struct ieee80211_prep_tx_info {
u16 duration;
@@ -4283,6 +4302,8 @@ struct ieee80211_prep_tx_info {
* @mgd_complete_tx: Notify the driver that the response frame for a previously
* transmitted frame announced with @mgd_prepare_tx was received, the data
* is filled similarly to @mgd_prepare_tx though the duration is not used.
+ * Note that this isn't always called for each mgd_prepare_tx() call, for
+ * example for SAE the 'confirm' messages can be on the air in any order.
*
* @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending
* a TDLS discovery-request, we expect a reply to arrive on the AP's
@@ -4447,6 +4468,8 @@ struct ieee80211_prep_tx_info {
* new links bitmaps may be 0 if going from/to a non-MLO situation.
* The @old array contains pointers to the old bss_conf structures
* that were already removed, in case they're needed.
+ * Note that removal of link should always succeed, so the return value
+ * will be ignored in a removal only case.
* This callback can sleep.
* @change_sta_links: Change the valid links of a station, similar to
* @change_vif_links. This callback can sleep.
@@ -4762,7 +4785,7 @@ struct ieee80211_ops {
u32 (*get_expected_throughput)(struct ieee80211_hw *hw,
struct ieee80211_sta *sta);
int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
- int *dbm);
+ unsigned int link_id, int *dbm);
int (*tdls_channel_switch)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
@@ -5334,22 +5357,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
int max_rates);
/**
- * ieee80211_sta_set_expected_throughput - set the expected tpt for a station
- *
- * Call this function to notify mac80211 about a change in expected throughput
- * to a station. A driver for a device that does rate control in firmware can
- * call this function when the expected throughput estimate towards a station
- * changes. The information is used to tune the CoDel AQM applied to traffic
- * going towards that station (which can otherwise be too aggressive and cause
- * slow stations to starve).
- *
- * @pubsta: the station to set throughput for.
- * @thr: the current expected throughput in kbps.
- */
-void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
- u32 thr);
-
-/**
* ieee80211_tx_rate_update - transmit rate update callback
*
* Drivers should call this functions with a non-NULL pub sta
@@ -6659,6 +6666,31 @@ void ieee80211_iter_chan_contexts_atomic(
void *iter_data);
/**
+ * ieee80211_iter_chan_contexts_mtx - iterate channel contexts
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @iter: iterator function
+ * @iter_data: data passed to iterator function
+ *
+ * Iterate all active channel contexts. This function can only be used while
+ * holding the wiphy mutex.
+ *
+ * The iterator will not find a context that's being added (during
+ * the driver callback to add it) but will find it while it's being
+ * removed.
+ *
+ * Note that during hardware restart, all contexts that existed
+ * before the restart are considered already present so will be
+ * found while iterating, whether they've been re-added already
+ * or not.
+ */
+void ieee80211_iter_chan_contexts_mtx(
+ struct ieee80211_hw *hw,
+ void (*iter)(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ void *data),
+ void *iter_data);
+
+/**
* ieee80211_ap_probereq_get - retrieve a Probe Request template
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
@@ -7734,6 +7766,50 @@ ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
}
}
+/**
+ * ieee80211_prepare_rx_omi_bw - prepare for sending BW RX OMI
+ * @link_sta: the link STA the OMI is going to be sent to
+ * @bw: the bandwidth requested
+ *
+ * When the driver decides to do RX OMI to change bandwidth with a STA
+ * it calls this function to prepare, then sends the OMI, and finally
+ * calls ieee80211_finalize_rx_omi_bw().
+ *
+ * Note that the (link) STA rate control is updated accordingly as well,
+ * but the chanctx might not be updated if there are other users.
+ * If the intention is to reduce the listen bandwidth, the driver must
+ * ensure there are no TDLS stations nor other uses of the chanctx.
+ *
+ * Also note that in order to sequence correctly, narrowing bandwidth
+ * will only happen in ieee80211_finalize_rx_omi_bw(), whereas widening
+ * again (e.g. going back to normal) will happen here.
+ *
+ * Note that we treat this symmetrically, so if the driver calls this
+ * and tells the peer to only send with a lower bandwidth, we assume
+ * that the driver also wants to only send at that lower bandwidth, to
+ * allow narrowing of the chanctx request for this station/interface.
+ *
+ * Finally, the driver must ensure that if the function returned %true,
+ * ieee80211_finalize_rx_omi_bw() is also called, even for example in
+ * case of HW restart.
+ *
+ * Context: Must be called with wiphy mutex held, and will call back
+ * into the driver, so ensure no driver locks are held.
+ *
+ * Return: %true if changes are going to be made, %false otherwise
+ */
+bool ieee80211_prepare_rx_omi_bw(struct ieee80211_link_sta *link_sta,
+ enum ieee80211_sta_rx_bandwidth bw);
+
+/**
+ * ieee80211_finalize_rx_omi_bw - finalize BW RX OMI update
+ * @link_sta: the link STA the OMI was sent to
+ *
+ * See ieee80211_client_prepare_rx_omi_bw(). Context is the same here
+ * as well.
+ */
+void ieee80211_finalize_rx_omi_bw(struct ieee80211_link_sta *link_sta);
+
/* for older drivers - let's not document these ... */
int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx);
diff --git a/include/net/macsec.h b/include/net/macsec.h
index de216cbc6b05..bc7de5b53e54 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -38,8 +38,8 @@ struct metadata_dst;
typedef union salt {
struct {
- u32 ssci;
- u64 pn;
+ ssci_t ssci;
+ __be64 pn;
} __packed;
u8 bytes[MACSEC_SALT_LEN];
} __packed salt_t;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 90f56656b572..3ce56a816425 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -60,6 +60,7 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_DONE = 131,
GDMA_EQE_HWC_SOC_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
+ GDMA_EQE_HWC_SOC_SERVICE = 134,
GDMA_EQE_RNIC_QP_FATAL = 176,
};
@@ -70,6 +71,18 @@ enum {
GDMA_DEVICE_MANA_IB = 3,
};
+enum gdma_service_type {
+ GDMA_SERVICE_TYPE_NONE = 0,
+ GDMA_SERVICE_TYPE_RDMA_SUSPEND = 1,
+ GDMA_SERVICE_TYPE_RDMA_RESUME = 2,
+};
+
+struct mana_service_work {
+ struct work_struct work;
+ struct gdma_dev *gdma_dev;
+ enum gdma_service_type event;
+};
+
struct gdma_resource {
/* Protect the bitmap */
spinlock_t lock;
@@ -152,6 +165,7 @@ struct gdma_general_req {
#define GDMA_MESSAGE_V1 1
#define GDMA_MESSAGE_V2 2
#define GDMA_MESSAGE_V3 3
+#define GDMA_MESSAGE_V4 4
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
@@ -223,6 +237,8 @@ struct gdma_dev {
void *driver_data;
struct auxiliary_device *adev;
+ bool is_suspended;
+ bool rdma_teardown;
};
/* MANA_PAGE_SIZE is the DMA unit */
@@ -406,9 +422,11 @@ struct gdma_context {
/* Azure RDMA adapter */
struct gdma_dev mana_ib;
-};
-#define MAX_NUM_GDMA_DEVICES 4
+ u64 pf_cap_flags1;
+
+ struct workqueue_struct *service_wq;
+};
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
{
@@ -554,13 +572,18 @@ enum {
*/
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
+#define GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB BIT(4)
#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
+/* Driver can handle holes (zeros) in the device list */
+#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
- GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT)
+ GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
+ GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
#define GDMA_DRV_CAP_FLAGS2 0
@@ -621,11 +644,12 @@ struct gdma_query_max_resources_resp {
}; /* HW DATA */
/* GDMA_LIST_DEVICES */
+#define GDMA_DEV_LIST_SIZE 64
struct gdma_list_devices_resp {
struct gdma_resp_hdr hdr;
u32 num_of_devs;
u32 reserved;
- struct gdma_dev_id devs[64];
+ struct gdma_dev_id devs[GDMA_DEV_LIST_SIZE];
}; /* HW DATA */
/* GDMA_REGISTER_DEVICE */
@@ -703,20 +727,6 @@ struct gdma_query_hwc_timeout_resp {
u32 reserved;
};
-enum atb_page_size {
- ATB_PAGE_SIZE_4K,
- ATB_PAGE_SIZE_8K,
- ATB_PAGE_SIZE_16K,
- ATB_PAGE_SIZE_32K,
- ATB_PAGE_SIZE_64K,
- ATB_PAGE_SIZE_128K,
- ATB_PAGE_SIZE_256K,
- ATB_PAGE_SIZE_512K,
- ATB_PAGE_SIZE_1M,
- ATB_PAGE_SIZE_2M,
- ATB_PAGE_SIZE_MAX,
-};
-
enum gdma_mr_access_flags {
GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0),
GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1),
@@ -775,6 +785,7 @@ struct gdma_destroy_dma_region_req {
enum gdma_pd_flags {
GDMA_PD_FLAG_INVALID = 0,
+ GDMA_PD_FLAG_ALLOW_GPA_MR = 1,
};
struct gdma_create_pd_req {
@@ -800,11 +811,18 @@ struct gdma_destory_pd_resp {
};/* HW DATA */
enum gdma_mr_type {
+ /*
+ * Guest Physical Address - MRs of this type allow access
+ * to any DMA-mapped memory using bus-logical address
+ */
+ GDMA_MR_TYPE_GPA = 1,
/* Guest Virtual Address - MRs of this type allow access
* to memory mapped by PTEs associated with this MR using a virtual
* address that is set up in the MST
*/
GDMA_MR_TYPE_GVA = 2,
+ /* Guest zero-based address MRs */
+ GDMA_MR_TYPE_ZBVA = 4,
};
struct gdma_create_mr_params {
@@ -816,6 +834,10 @@ struct gdma_create_mr_params {
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
+ struct {
+ u64 dma_region_handle;
+ enum gdma_mr_access_flags access_flags;
+ } zbva;
};
};
@@ -831,7 +853,10 @@ struct gdma_create_mr_request {
u64 virtual_address;
enum gdma_mr_access_flags access_flags;
} gva;
-
+ struct {
+ u64 dma_region_handle;
+ enum gdma_mr_access_flags access_flags;
+ } zbva;
};
u32 reserved_2;
};/* HW DATA */
@@ -883,4 +908,6 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
void mana_register_debugfs(void);
void mana_unregister_debugfs(void);
+int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
+
#endif /* _GDMA_H */
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 158b125692c2..83cf93338eb3 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -49,6 +49,15 @@ union hwc_init_type_data {
};
}; /* HW DATA */
+union hwc_init_soc_service_type {
+ u32 as_uint32;
+
+ struct {
+ u32 value : 28;
+ u32 type : 4;
+ };
+}; /* HW DATA */
+
struct hwc_rx_oob {
u32 type : 6;
u32 eom : 1;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0d00b24eacaf..9abb66461211 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -408,6 +408,7 @@ struct mana_context {
struct gdma_dev *gdma_dev;
u16 num_ports;
+ u8 bm_hostmode;
struct mana_eq *eqs;
struct dentry *mana_eqs_debugfs;
@@ -488,6 +489,9 @@ int mana_detach(struct net_device *ndev, bool from_close);
int mana_probe(struct gdma_dev *gd, bool resuming);
void mana_remove(struct gdma_dev *gd, bool suspending);
+int mana_rdma_probe(struct gdma_dev *gd);
+void mana_rdma_remove(struct gdma_dev *gd);
+
void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
u32 flags);
@@ -557,7 +561,8 @@ struct mana_query_device_cfg_resp {
u64 pf_cap_flags4;
u16 max_num_vports;
- u16 reserved;
+ u8 bm_hostmode; /* response v3: Bare Metal Host Mode */
+ u8 reserved;
u32 max_num_eqs;
/* response v2: */
@@ -827,5 +832,7 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
u32 doorbell_pg_id);
void mana_uncfg_vport(struct mana_port_context *apc);
-struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index);
+struct net_device *mana_get_primary_netdev(struct mana_context *ac,
+ u32 port_index,
+ netdevice_tracker *tracker);
#endif /* _MANA_H */
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 1ecbff7116f6..07d458990113 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -212,7 +212,7 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
BUILD_BUG_ON(sizeof(struct mctp_skb_cb) > sizeof(skb->cb));
WARN_ON(cb->magic != 0x4d435450);
- return (void *)(skb->cb);
+ return cb;
}
/* If CONFIG_MCTP_FLOWS, we may add one of these as a SKB extension,
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 814b5f2e3ed5..f7263fe2a2e4 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -14,6 +14,7 @@
struct mptcp_info;
struct mptcp_sock;
+struct mptcp_pm_addr_entry;
struct seq_file;
/* MPTCP sk_buff extension data */
@@ -100,17 +101,9 @@ struct mptcp_out_options {
#define MPTCP_SCHED_MAX 128
#define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX)
-#define MPTCP_SUBFLOWS_MAX 8
-
-struct mptcp_sched_data {
- bool reinject;
- u8 subflows;
- struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
-};
-
struct mptcp_sched_ops {
- int (*get_subflow)(struct mptcp_sock *msk,
- struct mptcp_sched_data *data);
+ int (*get_send)(struct mptcp_sock *msk);
+ int (*get_retrans)(struct mptcp_sock *msk);
char name[MPTCP_SCHED_NAME_MAX];
struct module *owner;
@@ -120,6 +113,19 @@ struct mptcp_sched_ops {
void (*release)(struct mptcp_sock *msk);
} ____cacheline_aligned_in_smp;
+#define MPTCP_PM_NAME_MAX 16
+#define MPTCP_PM_MAX 128
+#define MPTCP_PM_BUF_MAX (MPTCP_PM_NAME_MAX * MPTCP_PM_MAX)
+
+struct mptcp_pm_ops {
+ char name[MPTCP_PM_NAME_MAX];
+ struct module *owner;
+ struct list_head list;
+
+ void (*init)(struct mptcp_sock *msk);
+ void (*release)(struct mptcp_sock *msk);
+} ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_MPTCP
void mptcp_init(void);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 47181fd749b2..025a7574b275 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -83,6 +83,9 @@ struct net {
struct llist_node defer_free_list;
struct llist_node cleanup_list; /* namespaces on death row */
+ struct list_head ptype_all;
+ struct list_head ptype_specific;
+
#ifdef CONFIG_KEYS
struct key_tag *key_domain; /* Key domain of operation tag */
#endif
@@ -210,6 +213,8 @@ void net_ns_barrier(void);
struct ns_common *get_net_ns(struct ns_common *ns);
struct net *get_net_ns_by_fd(int fd);
+extern struct task_struct *cleanup_net_task;
+
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
@@ -470,8 +475,8 @@ struct pernet_operations {
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
/* Following method is called with RTNL held. */
- void (*exit_batch_rtnl)(struct list_head *net_exit_list,
- struct list_head *dev_kill_list);
+ void (*exit_rtnl)(struct net *net,
+ struct list_head *dev_kill_list);
unsigned int * const id;
const size_t size;
};
diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h
new file mode 100644
index 000000000000..3d3aef80beac
--- /dev/null
+++ b/include/net/netdev_lock.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _NET_NETDEV_LOCK_H
+#define _NET_NETDEV_LOCK_H
+
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+
+static inline bool netdev_trylock(struct net_device *dev)
+{
+ return mutex_trylock(&dev->lock);
+}
+
+static inline void netdev_assert_locked(const struct net_device *dev)
+{
+ lockdep_assert_held(&dev->lock);
+}
+
+static inline void
+netdev_assert_locked_or_invisible(const struct net_device *dev)
+{
+ if (dev->reg_state == NETREG_REGISTERED ||
+ dev->reg_state == NETREG_UNREGISTERING)
+ netdev_assert_locked(dev);
+}
+
+static inline bool netdev_need_ops_lock(const struct net_device *dev)
+{
+ bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops;
+
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+ ret |= !!dev->netdev_ops->net_shaper_ops;
+#endif
+
+ return ret;
+}
+
+static inline void netdev_lock_ops(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+}
+
+static inline void netdev_unlock_ops(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+}
+
+static inline void netdev_lock_ops_to_full(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_assert_locked(dev);
+ else
+ netdev_lock(dev);
+}
+
+static inline void netdev_unlock_full_to_ops(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_assert_locked(dev);
+ else
+ netdev_unlock(dev);
+}
+
+static inline void netdev_ops_assert_locked(const struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ lockdep_assert_held(&dev->lock);
+ else
+ ASSERT_RTNL();
+}
+
+static inline void
+netdev_ops_assert_locked_or_invisible(const struct net_device *dev)
+{
+ if (dev->reg_state == NETREG_REGISTERED ||
+ dev->reg_state == NETREG_UNREGISTERING)
+ netdev_ops_assert_locked(dev);
+}
+
+static inline void netdev_lock_ops_compat(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+ else
+ rtnl_lock();
+}
+
+static inline void netdev_unlock_ops_compat(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+ else
+ rtnl_unlock();
+}
+
+static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
+{
+ if (a == b)
+ return 0;
+
+ /* Allow locking multiple devices only under rtnl_lock,
+ * the exact order doesn't matter.
+ * Note that upper devices don't lock their ops, so nesting
+ * mostly happens in batched device removal for now.
+ */
+ return lockdep_rtnl_is_held() ? -1 : 1;
+}
+
+#define netdev_lockdep_set_classes(dev) \
+{ \
+ static struct lock_class_key qdisc_tx_busylock_key; \
+ static struct lock_class_key qdisc_xmit_lock_key; \
+ static struct lock_class_key dev_addr_list_lock_key; \
+ static struct lock_class_key dev_instance_lock_key; \
+ unsigned int i; \
+ \
+ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
+ lockdep_set_class(&(dev)->addr_list_lock, \
+ &dev_addr_list_lock_key); \
+ lockdep_set_class(&(dev)->lock, \
+ &dev_instance_lock_key); \
+ lock_set_cmp_fn(&dev->lock, netdev_lock_cmp_fn, NULL); \
+ for (i = 0; i < (dev)->num_tx_queues; i++) \
+ lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \
+ &qdisc_xmit_lock_key); \
+}
+
+#define netdev_lock_dereference(p, dev) \
+ rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock))
+
+int netdev_debug_event(struct notifier_block *nb, unsigned long event,
+ void *ptr);
+
+#endif
diff --git a/include/net/netdev_netlink.h b/include/net/netdev_netlink.h
new file mode 100644
index 000000000000..075962dbe743
--- /dev/null
+++ b/include/net/netdev_netlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_NETDEV_NETLINK_H
+#define __NET_NETDEV_NETLINK_H
+
+#include <linux/list.h>
+
+struct netdev_nl_sock {
+ struct mutex lock;
+ struct list_head bindings;
+};
+
+#endif /* __NET_NETDEV_NETLINK_H */
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 5ca019d294ca..ba2eaf39089b 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -4,6 +4,16 @@
#include <linux/netdevice.h>
+/**
+ * struct netdev_config - queue-related configuration for a netdev
+ * @hds_thresh: HDS Threshold value.
+ * @hds_config: HDS value from userspace.
+ */
+struct netdev_config {
+ u32 hds_thresh;
+ u8 hds_config;
+};
+
/* See the netdev.yaml spec for definition of each statistic */
struct netdev_queue_stats_rx {
u64 bytes;
@@ -13,6 +23,7 @@ struct netdev_queue_stats_rx {
u64 hw_drops;
u64 hw_drop_overruns;
+ u64 csum_complete;
u64 csum_unnecessary;
u64 csum_none;
u64 csum_bad;
@@ -74,9 +85,11 @@ struct netdev_queue_stats_tx {
* for some of the events is not maintained, and reliable "total" cannot
* be provided).
*
+ * Ops are called under the instance lock if netdev_need_ops_lock()
+ * returns true, otherwise under rtnl_lock.
* Device drivers can assume that when collecting total device stats,
* the @get_base_stats and subsequent per-queue calls are performed
- * "atomically" (without releasing the rtnl_lock).
+ * "atomically" (without releasing the relevant lock).
*
* Device drivers are encouraged to reset the per-queue statistics when
* number of queues change. This is because the primary use case for
@@ -92,6 +105,12 @@ struct netdev_stat_ops {
struct netdev_queue_stats_tx *tx);
};
+void netdev_stat_queue_sum(struct net_device *netdev,
+ int rx_start, int rx_end,
+ struct netdev_queue_stats_rx *rx_sum,
+ int tx_start, int tx_end,
+ struct netdev_queue_stats_tx *tx_sum);
+
/**
* struct netdev_queue_mgmt_ops - netdev ops for queue management
*
@@ -107,6 +126,10 @@ struct netdev_stat_ops {
*
* @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
* queue's memory is written at the specified address.
+ *
+ * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
+ * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
+ * be called for an interface which is open.
*/
struct netdev_queue_mgmt_ops {
size_t ndo_queue_mem_size;
@@ -265,27 +288,27 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue,
#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \
({ \
- struct netdev_queue *txq; \
+ struct netdev_queue *_txq; \
\
- txq = netdev_get_tx_queue(dev, idx); \
- netif_txq_try_stop(txq, get_desc, start_thrs); \
+ _txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_try_stop(_txq, get_desc, start_thrs); \
})
#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
({ \
- struct netdev_queue *txq; \
+ struct netdev_queue *_txq; \
\
- txq = netdev_get_tx_queue(dev, idx); \
- netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \
+ _txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_maybe_stop(_txq, get_desc, stop_thrs, start_thrs); \
})
#define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \
get_desc, start_thrs) \
({ \
- struct netdev_queue *txq; \
+ struct netdev_queue *_txq; \
\
- txq = netdev_get_tx_queue(dev, idx); \
- netif_txq_completed_wake(txq, pkts, bytes, \
+ _txq = netdev_get_tx_queue(dev, idx); \
+ netif_txq_completed_wake(_txq, pkts, bytes, \
get_desc, start_thrs); \
})
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index 596836abf7bf..8cdcd138b33f 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -16,15 +16,16 @@ struct netdev_rx_queue {
struct rps_dev_flow_table __rcu *rps_flow_table;
#endif
struct kobject kobj;
+ const struct attribute_group **groups;
struct net_device *dev;
netdevice_tracker dev_tracker;
+ /* All fields below are "ops protected",
+ * see comment about net_device::lock
+ */
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
- /* NAPI instance for the queue
- * Readers and writers must hold RTNL
- */
struct napi_struct *napi;
struct pp_memory_provider_params mp_params;
} ____cacheline_aligned_in_smp;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index cba3ccf03fcc..ca26274196b9 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -204,8 +204,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple);
void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- u32 extra_jiffies, bool do_acct);
+ u32 extra_jiffies, unsigned int bytes);
/* Refresh conntrack for this many jiffies and do accounting */
static inline void nf_ct_refresh_acct(struct nf_conn *ct,
@@ -213,15 +212,14 @@ static inline void nf_ct_refresh_acct(struct nf_conn *ct,
const struct sk_buff *skb,
u32 extra_jiffies)
{
- __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true);
+ __nf_ct_refresh_acct(ct, ctinfo, extra_jiffies, skb->len);
}
/* Refresh conntrack for this many jiffies */
static inline void nf_ct_refresh(struct nf_conn *ct,
- const struct sk_buff *skb,
u32 extra_jiffies)
{
- __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false);
+ __nf_ct_refresh_acct(ct, 0, extra_jiffies, 0);
}
/* kill conntrack and do accounting */
@@ -308,22 +306,23 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct)
/* use after obtaining a reference count */
static inline bool nf_ct_should_gc(const struct nf_conn *ct)
{
- return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
- !nf_ct_is_dying(ct);
+ if (!nf_ct_is_confirmed(ct))
+ return false;
+
+ /* load ct->timeout after is_confirmed() test.
+ * Pairs with __nf_conntrack_confirm() which:
+ * 1. Increases ct->timeout value
+ * 2. Inserts ct into rcu hlist
+ * 3. Sets the confirmed bit
+ * 4. Unlocks the hlist lock
+ */
+ smp_acquire__after_ctrl_dep();
+
+ return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct);
}
#define NF_CT_DAY (86400 * HZ)
-/* Set an arbitrary timeout large enough not to ever expire, this save
- * us a check for the IPS_OFFLOAD_BIT from the packet path via
- * nf_ct_is_expired().
- */
-static inline void nf_ct_offload_timeout(struct nf_conn *ct)
-{
- if (nf_ct_expires(ct) < NF_CT_DAY / 2)
- WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
-}
-
struct kernel_param;
int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 0c1dac318e02..8dcf7c371ee9 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -12,6 +12,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack_extend.h>
+#include <asm/local64.h>
enum nf_ct_ecache_state {
NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */
@@ -20,6 +21,9 @@ enum nf_ct_ecache_state {
struct nf_conntrack_ecache {
unsigned long cache; /* bitops want long */
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ local64_t timestamp; /* event timestamp, in nanoseconds */
+#endif
u16 ctmask; /* bitmask of ct events to be delivered */
u16 expmask; /* bitmask of expect events to be delivered */
u32 missed; /* missed events */
@@ -108,6 +112,14 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
if (e == NULL)
return;
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ /* renew only if this is the first cached event, so that the
+ * timestamp reflects the first, not the last, generated event.
+ */
+ if (local64_read(&e->timestamp) && READ_ONCE(e->cache) == 0)
+ local64_set(&e->timestamp, ktime_get_real_ns());
+#endif
+
set_bit(event, &e->cache);
#endif
}
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index b63d53bb9dd6..c003cd194fa2 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -163,6 +163,7 @@ struct flow_offload_tuple_rhash {
enum nf_flow_flags {
NF_FLOW_SNAT,
NF_FLOW_DNAT,
+ NF_FLOW_CLOSING,
NF_FLOW_TEARDOWN,
NF_FLOW_HW,
NF_FLOW_HW_DYING,
@@ -369,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
{
- if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN))
return false;
*inner_proto = __nf_flow_pppoe_proto(skb);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f6958118986a..5e49619ae49c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1199,10 +1199,17 @@ struct nft_stats {
struct nft_hook {
struct list_head list;
- struct nf_hook_ops ops;
+ struct list_head ops_list;
struct rcu_head rcu;
+ char ifname[IFNAMSIZ];
+ u8 ifnamelen;
};
+struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook,
+ const struct net_device *dev);
+struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook,
+ const struct net_device *dev);
+
/**
* struct nft_base_chain - nf_tables base chain
*
@@ -1236,8 +1243,6 @@ static inline bool nft_is_base_chain(const struct nft_chain *chain)
return chain->flags & NFT_CHAIN_BASE;
}
-int __nft_release_basechain(struct nft_ctx *ctx);
-
unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
static inline bool nft_use_inc(u32 *use)
@@ -1891,7 +1896,7 @@ void nft_chain_filter_fini(void);
void __init nft_chain_route_init(void);
void nft_chain_route_fini(void);
-void nf_tables_trans_destroy_flush_work(void);
+void nf_tables_trans_destroy_flush_work(struct net *net);
int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
__be64 nf_jiffies64_to_msecs(u64 input);
@@ -1905,6 +1910,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
struct nftables_pernet {
struct list_head tables;
struct list_head commit_list;
+ struct list_head destroy_list;
struct list_head commit_set_list;
struct list_head binding_list;
struct list_head module_list;
@@ -1915,6 +1921,7 @@ struct nftables_pernet {
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
+ struct work_struct destroy_work;
};
extern unsigned int nf_tables_net_id;
diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 5adf6fda11e8..06985530517b 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -49,7 +49,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
*
* nf_tproxy_handle_time_wait4() consumes the socket reference passed in.
*
- * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * Returns: the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
@@ -108,7 +108,7 @@ nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
*
* nf_tproxy_handle_time_wait6() consumes the socket reference passed in.
*
- * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * Returns: the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 38cae7113de4..7370fba844ef 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -2,6 +2,7 @@
#ifndef _NFT_FIB_H_
#define _NFT_FIB_H_
+#include <net/l3mdev.h>
#include <net/netfilter/nf_tables.h>
struct nft_fib {
@@ -18,6 +19,35 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in)
return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
+static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sock *sk;
+
+ switch (nft_hook(pkt)) {
+ case NF_INET_PRE_ROUTING:
+ case NF_INET_INGRESS:
+ case NF_INET_LOCAL_IN:
+ break;
+ default:
+ return false;
+ }
+
+ sk = pkt->skb->sk;
+ if (sk && sk_fullsock(sk))
+ return sk->sk_rx_dst_ifindex == indev->ifindex;
+
+ return nft_fib_is_loopback(pkt->skb, indev);
+}
+
+static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt,
+ const struct net_device *iif)
+{
+ const struct net_device *dev = iif ? iif : pkt->skb->dev;
+
+ return l3mdev_master_ifindex_rcu(dev);
+}
+
int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset);
int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[]);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 39eaa6be6ca8..90a560dc167a 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -118,6 +118,7 @@
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
+ * nla_put_empty_nest(skb, type) create an empty nest
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
@@ -320,7 +321,13 @@ enum nla_policy_validation {
* All other Unused - but note that it's a union
*
* Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
+ * NLA_U8, NLA_U16,
+ * NLA_U32, NLA_U64,
+ * NLA_S8, NLA_S16,
+ * NLA_S32, NLA_S64,
+ * NLA_MSECS,
* NLA_BINARY Validation function called for the attribute.
+ *
* All other Unused - but note that it's a union
*
* Example:
@@ -611,6 +618,22 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh)
}
/**
+ * nlmsg_payload - message payload if the data fits in the len
+ * @nlh: netlink message header
+ * @len: struct length
+ *
+ * Returns: The netlink message payload/data if the length is sufficient,
+ * otherwise NULL.
+ */
+static inline void *nlmsg_payload(const struct nlmsghdr *nlh, size_t len)
+{
+ if (nlh->nlmsg_len < nlmsg_msg_size(len))
+ return NULL;
+
+ return nlmsg_data(nlh);
+}
+
+/**
* nlmsg_attrdata - head of attributes data
* @nlh: netlink message header
* @hdrlen: length of family specific header
@@ -649,7 +672,7 @@ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
* @nlh: netlink message header
* @remaining: number of bytes remaining in message stream
*
- * Returns the next netlink message in the message stream and
+ * Returns: the next netlink message in the message stream and
* decrements remaining by the size of the current message.
*/
static inline struct nlmsghdr *
@@ -676,7 +699,7 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining)
* exceeding maxtype will be rejected, policy must be specified, attributes
* will be validated in the strictest way possible.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse(struct nlattr **tb, int maxtype,
const struct nlattr *head, int len,
@@ -701,7 +724,7 @@ static inline int nla_parse(struct nlattr **tb, int maxtype,
* exceeding maxtype will be ignored and attributes from the policy are not
* always strictly validated (only for new attributes).
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
const struct nlattr *head, int len,
@@ -726,7 +749,7 @@ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
* exceeding maxtype will be rejected as well as trailing data, but the
* policy is not completely strictly validated (only for new attributes).
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype,
const struct nlattr *head,
@@ -833,7 +856,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @attrtype: type of attribute to look for
*
- * Returns the first attribute which matches the specified type.
+ * Returns: the first attribute which matches the specified type.
*/
static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
int hdrlen, int attrtype)
@@ -854,7 +877,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
* specified policy. Validation is done in liberal mode.
* See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_validate_deprecated(const struct nlattr *head, int len,
int maxtype,
@@ -877,7 +900,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len,
* specified policy. Validation is done in strict mode.
* See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_validate(const struct nlattr *head, int len, int maxtype,
const struct nla_policy *policy,
@@ -914,7 +937,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
* nlmsg_report - need to report back to application?
* @nlh: netlink message header
*
- * Returns 1 if a report back to the application is requested.
+ * Returns: 1 if a report back to the application is requested.
*/
static inline int nlmsg_report(const struct nlmsghdr *nlh)
{
@@ -925,7 +948,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh)
* nlmsg_seq - return the seq number of netlink message
* @nlh: netlink message header
*
- * Returns 0 if netlink message is NULL
+ * Returns: 0 if netlink message is NULL
*/
static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
{
@@ -952,7 +975,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
* @payload: length of message payload
* @flags: message flags
*
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
@@ -971,7 +994,7 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se
*
* Append data to an existing nlmsg, used when constructing a message
* with multiple fixed-format headers (which is rare).
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the extra payload.
*/
static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
@@ -993,7 +1016,7 @@ static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
* @payload: length of message payload
* @flags: message flags
*
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
@@ -1050,7 +1073,7 @@ static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
* nlmsg_get_pos - return current position in netlink message
* @skb: socket buffer the message is stored in
*
- * Returns a pointer to the current tail of the message.
+ * Returns: a pointer to the current tail of the message.
*/
static inline void *nlmsg_get_pos(struct sk_buff *skb)
{
@@ -1276,7 +1299,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining)
* @nla: netlink attribute
* @remaining: number of bytes remaining in attribute stream
*
- * Returns the next netlink attribute in the attribute stream and
+ * Returns: the next netlink attribute in the attribute stream and
* decrements remaining by the size of the current attribute.
*/
static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
@@ -1292,7 +1315,7 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
* @nla: attribute containing the nested attributes
* @attrtype: type of attribute to look for
*
- * Returns the first attribute which matches the specified type.
+ * Returns: the first attribute which matches the specified type.
*/
static inline struct nlattr *
nla_find_nested(const struct nlattr *nla, int attrtype)
@@ -2091,7 +2114,7 @@ static inline int nla_get_flag(const struct nlattr *nla)
* nla_get_msecs - return payload of msecs attribute
* @nla: msecs netlink attribute
*
- * Returns the number of milliseconds in jiffies.
+ * Returns: the number of milliseconds in jiffies.
*/
static inline unsigned long nla_get_msecs(const struct nlattr *nla)
{
@@ -2183,7 +2206,7 @@ static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp)
* marked their nest attributes with NLA_F_NESTED flag. New APIs should use
* nla_nest_start() which sets the flag.
*
- * Returns the container attribute or NULL on error
+ * Returns: the container attribute or NULL on error
*/
static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
int attrtype)
@@ -2204,7 +2227,7 @@ static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
* Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED
* flag. This is the preferred function to use in new code.
*
- * Returns the container attribute or NULL on error
+ * Returns: the container attribute or NULL on error
*/
static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
{
@@ -2219,7 +2242,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
* Corrects the container attribute header to include the all
* appended attributes.
*
- * Returns the total data length of the skb.
+ * Returns: the total data length of the skb.
*/
static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
{
@@ -2241,6 +2264,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
}
/**
+ * nla_put_empty_nest - Create an empty nest
+ * @skb: socket buffer the message is stored in
+ * @attrtype: attribute type of the container
+ *
+ * This function is a helper for creating empty nests.
+ *
+ * Returns: 0 when successful or -EMSGSIZE on failure.
+ */
+static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype)
+{
+ return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE;
+}
+
+/**
* __nla_validate_nested - Validate a stream of nested attributes
* @start: container attribute
* @maxtype: maximum attribute type to be expected
@@ -2252,7 +2289,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
* specified policy. Attributes with a type exceeding maxtype will be
* ignored. See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int __nla_validate_nested(const struct nlattr *start, int maxtype,
const struct nla_policy *policy,
@@ -2285,7 +2322,7 @@ nla_validate_nested_deprecated(const struct nlattr *start, int maxtype,
* nla_need_padding_for_64bit - test 64-bit alignment of the next attribute
* @skb: socket buffer the message is stored in
*
- * Return true if padding is needed to align the next attribute (nla_data()) to
+ * Return: true if padding is needed to align the next attribute (nla_data()) to
* a 64-bit aligned area.
*/
static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
@@ -2312,7 +2349,7 @@ static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
* This will only be done in architectures which do not have
* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined.
*
- * Returns zero on success or a negative error code.
+ * Returns: zero on success or a negative error code.
*/
static inline int nla_align_64bit(struct sk_buff *skb, int padattr)
{
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 8a6e20be4b9d..386164fb9c18 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -8,6 +8,7 @@
#ifndef _NET_NETMEM_H
#define _NET_NETMEM_H
+#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <net/net_debug.h>
@@ -20,15 +21,33 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
*/
#define NET_IOV 0x01UL
+enum net_iov_type {
+ NET_IOV_DMABUF,
+ NET_IOV_IOURING,
+
+ /* Force size to unsigned long to make the NET_IOV_ASSERTS below pass.
+ */
+ NET_IOV_MAX = ULONG_MAX
+};
+
struct net_iov {
- unsigned long __unused_padding;
+ enum net_iov_type type;
unsigned long pp_magic;
struct page_pool *pp;
- struct dmabuf_genpool_chunk_owner *owner;
+ struct net_iov_area *owner;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
+struct net_iov_area {
+ /* Array of net_iovs for this area. */
+ struct net_iov *niovs;
+ size_t num_niovs;
+
+ /* Offset into the dma-buf where this chunk starts. */
+ unsigned long base_virtual;
+};
+
/* These fields in struct page are used by the page_pool and net stack:
*
* struct {
@@ -54,6 +73,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
#undef NET_IOV_ASSERT_OFFSET
+static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov)
+{
+ return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+ return niov - net_iov_owner(niov)->niovs;
+}
+
/* netmem */
/**
@@ -72,6 +101,22 @@ static inline bool netmem_is_net_iov(const netmem_ref netmem)
return (__force unsigned long)netmem & NET_IOV;
}
+/**
+ * __netmem_to_page - unsafely get pointer to the &page backing @netmem
+ * @netmem: netmem reference to convert
+ *
+ * Unsafe version of netmem_to_page(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (no check for the LSB, no WARN). When @netmem points to IOV,
+ * provokes undefined behaviour.
+ *
+ * Return: pointer to the &page (garbage if @netmem is not page-backed).
+ */
+static inline struct page *__netmem_to_page(netmem_ref netmem)
+{
+ return (__force struct page *)netmem;
+}
+
/* This conversion fails (returns NULL) if the netmem_ref is not struct page
* backed.
*/
@@ -80,7 +125,7 @@ static inline struct page *netmem_to_page(netmem_ref netmem)
if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
return NULL;
- return (__force struct page *)netmem;
+ return __netmem_to_page(netmem);
}
static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem)
@@ -103,6 +148,17 @@ static inline netmem_ref page_to_netmem(struct page *page)
return (__force netmem_ref)page;
}
+/**
+ * virt_to_netmem - convert virtual memory pointer to a netmem reference
+ * @data: host memory pointer to convert
+ *
+ * Return: netmem reference to the &page backing this virtual address.
+ */
+static inline netmem_ref virt_to_netmem(const void *data)
+{
+ return page_to_netmem(virt_to_page(data));
+}
+
static inline int netmem_ref_count(netmem_ref netmem)
{
/* The non-pp refcount of net_iov is always 1. On net_iov, we only
@@ -127,6 +183,22 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
}
+/**
+ * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem
+ * @netmem: netmem reference to get the pointer from
+ *
+ * Unsafe version of netmem_get_pp(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (avoids clearing the LSB). When @netmem points to IOV,
+ * provokes invalid memory access.
+ *
+ * Return: pointer to the &page_pool (garbage if @netmem is not page-backed).
+ */
+static inline struct page_pool *__netmem_get_pp(netmem_ref netmem)
+{
+ return __netmem_to_page(netmem)->pp;
+}
+
static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
{
return __netmem_clear_lsb(netmem)->pp;
@@ -158,12 +230,43 @@ static inline netmem_ref netmem_compound_head(netmem_ref netmem)
return page_to_netmem(compound_head(netmem_to_page(netmem)));
}
+/**
+ * __netmem_address - unsafely get pointer to the memory backing @netmem
+ * @netmem: netmem reference to get the pointer for
+ *
+ * Unsafe version of netmem_address(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (no check for the LSB). When @netmem points to IOV, provokes
+ * undefined behaviour.
+ *
+ * Return: pointer to the memory (garbage if @netmem is not page-backed).
+ */
+static inline void *__netmem_address(netmem_ref netmem)
+{
+ return page_address(__netmem_to_page(netmem));
+}
+
static inline void *netmem_address(netmem_ref netmem)
{
if (netmem_is_net_iov(netmem))
return NULL;
- return page_address(netmem_to_page(netmem));
+ return __netmem_address(netmem);
+}
+
+/**
+ * netmem_is_pfmemalloc - check if @netmem was allocated under memory pressure
+ * @netmem: netmem reference to check
+ *
+ * Return: true if @netmem is page-backed and the page was allocated under
+ * memory pressure, false otherwise.
+ */
+static inline bool netmem_is_pfmemalloc(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return false;
+
+ return page_is_pfmemalloc(netmem_to_page(netmem));
}
static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
@@ -171,4 +274,26 @@ static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
return __netmem_clear_lsb(netmem)->dma_addr;
}
+void get_netmem(netmem_ref netmem);
+void put_netmem(netmem_ref netmem);
+
+#define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \
+ do { \
+ if (!netmem_is_net_iov(NETMEM)) \
+ dma_unmap_addr_set(PTR, ADDR_NAME, VAL); \
+ else \
+ dma_unmap_addr_set(PTR, ADDR_NAME, 0); \
+ } while (0)
+
+static inline void netmem_dma_unmap_page_attrs(struct device *dev,
+ dma_addr_t addr, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (!addr)
+ return;
+
+ dma_unmap_page_attrs(dev, addr, size, dir, attrs);
+}
+
#endif /* _NET_NETMEM_H */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 3c014170e001..6373e3f17da8 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -47,6 +47,11 @@ struct sysctl_fib_multipath_hash_seed {
};
#endif
+struct udp_tunnel_gro {
+ struct sock __rcu *sk;
+ struct hlist_head list;
+};
+
struct netns_ipv4 {
/* Cacheline organization can be found documented in
* Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
@@ -85,6 +90,11 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+ /* Not in a pernet subsys because need to be available at GRO stage */
+ struct udp_tunnel_gro udp_tunnel_gro[2];
+#endif
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
struct ctl_table_header *frags_hdr;
@@ -111,6 +121,9 @@ struct netns_ipv4 {
#endif
struct hlist_head *fib_table_hash;
struct sock *fibnl;
+ struct hlist_head *fib_info_hash;
+ unsigned int fib_info_hash_bits;
+ unsigned int fib_info_cnt;
struct sock *mc_autojoin_sk;
@@ -175,11 +188,13 @@ struct netns_ipv4 {
u8 sysctl_tcp_retries2;
u8 sysctl_tcp_orphan_retries;
u8 sysctl_tcp_tw_reuse;
+ unsigned int sysctl_tcp_tw_reuse_delay;
int sysctl_tcp_fin_timeout;
u8 sysctl_tcp_sack;
u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps;
int sysctl_tcp_rto_min_us;
+ int sysctl_tcp_rto_max_ms;
u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle;
@@ -272,4 +287,5 @@ struct netns_ipv4 {
struct hlist_head *inet_addr_lst;
struct delayed_work addr_chk_work;
};
+
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5f2cfd84570a..47dc70d8100a 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -72,6 +72,7 @@ struct netns_ipv6 {
struct rt6_statistics *rt6_stats;
struct timer_list ip6_fib_timer;
struct hlist_head *fib_table_hash;
+ spinlock_t fib_table_hash_lock;
struct fib6_table *fib6_main_tbl;
struct list_head fib6_walkers;
rwlock_t fib6_walker_lock;
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index d9fb44e8b321..572e69cda476 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -152,6 +152,8 @@ struct nexthop {
u8 protocol; /* app managing this nh */
u8 nh_flags;
bool is_group;
+ bool dead;
+ spinlock_t lock; /* protect dead and f6i_list */
refcount_t refcnt;
struct rcu_head rcu;
diff --git a/include/net/p8022.h b/include/net/p8022.h
deleted file mode 100644
index a29e224ac498..000000000000
--- a/include/net/p8022.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_P8022_H
-#define _NET_P8022_H
-
-struct net_device;
-struct packet_type;
-struct sk_buff;
-
-struct datalink_proto *
-register_8022_client(unsigned char type,
- int (*func)(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt,
- struct net_device *orig_dev));
-void unregister_8022_client(struct datalink_proto *proto);
-#endif
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 60a5347922be..93f2c31baf9b 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -104,8 +104,7 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
*
* Get a page fragment from the page allocator or page_pool caches.
*
- * Return:
- * Return allocated page fragment, otherwise return NULL.
+ * Return: allocated page fragment, otherwise return NULL.
*/
static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
unsigned int *offset,
@@ -116,22 +115,22 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
return page_pool_alloc_frag(pool, offset, size, gfp);
}
-static inline struct page *page_pool_alloc(struct page_pool *pool,
- unsigned int *offset,
- unsigned int *size, gfp_t gfp)
+static inline netmem_ref page_pool_alloc_netmem(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size, gfp_t gfp)
{
unsigned int max_size = PAGE_SIZE << pool->p.order;
- struct page *page;
+ netmem_ref netmem;
if ((*size << 1) > max_size) {
*size = max_size;
*offset = 0;
- return page_pool_alloc_pages(pool, gfp);
+ return page_pool_alloc_netmems(pool, gfp);
}
- page = page_pool_alloc_frag(pool, offset, *size, gfp);
- if (unlikely(!page))
- return NULL;
+ netmem = page_pool_alloc_frag_netmem(pool, offset, *size, gfp);
+ if (unlikely(!netmem))
+ return 0;
/* There is very likely not enough space for another fragment, so append
* the remaining size to the current fragment to avoid truesize
@@ -142,7 +141,23 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
pool->frag_offset = max_size;
}
- return page;
+ return netmem;
+}
+
+static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size)
+{
+ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+
+ return page_pool_alloc_netmem(pool, offset, size, gfp);
+}
+
+static inline struct page *page_pool_alloc(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size, gfp_t gfp)
+{
+ return netmem_to_page(page_pool_alloc_netmem(pool, offset, size, gfp));
}
/**
@@ -155,8 +170,7 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
* depending on the requested size in order to allocate memory with least memory
* utilization and performance penalty.
*
- * Return:
- * Return allocated page or page fragment, otherwise return NULL.
+ * Return: allocated page or page fragment, otherwise return NULL.
*/
static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
unsigned int *offset,
@@ -190,8 +204,7 @@ static inline void *page_pool_alloc_va(struct page_pool *pool,
* This is just a thin wrapper around the page_pool_alloc() API, and
* it returns va of the allocated page or page fragment.
*
- * Return:
- * Return the va for the allocated page or page fragment, otherwise return NULL.
+ * Return: the va for the allocated page or page fragment, otherwise return NULL.
*/
static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
unsigned int *size)
@@ -302,7 +315,7 @@ static inline void page_pool_ref_page(struct page *page)
page_pool_ref_netmem(page_to_netmem(page));
}
-static inline bool page_pool_is_last_ref(netmem_ref netmem)
+static inline bool page_pool_unref_and_test(netmem_ref netmem)
{
/* If page_pool_unref_page() returns 0, we were the last user */
return page_pool_unref_netmem(netmem, 1) == 0;
@@ -317,7 +330,7 @@ static inline void page_pool_put_netmem(struct page_pool *pool,
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_ref(netmem))
+ if (!page_pool_unref_and_test(netmem))
return;
page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct);
@@ -382,6 +395,12 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
page_pool_put_full_page(pool, page, true);
}
+static inline void page_pool_recycle_direct_netmem(struct page_pool *pool,
+ netmem_ref netmem)
+{
+ page_pool_put_full_netmem(pool, netmem, true);
+}
+
#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \
(sizeof(dma_addr_t) > sizeof(unsigned long))
@@ -418,7 +437,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
*/
static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
{
- return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+ ret <<= PAGE_SHIFT;
+
+ return ret;
+}
+
+static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool,
+ const dma_addr_t dma_addr,
+ u32 offset, u32 dma_sync_size)
+{
+ dma_sync_single_range_for_cpu(pool->p.dev, dma_addr,
+ offset + pool->p.offset, dma_sync_size,
+ page_pool_get_dma_dir(pool));
}
/**
@@ -437,10 +470,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
const struct page *page,
u32 offset, u32 dma_sync_size)
{
- dma_sync_single_range_for_cpu(pool->p.dev,
- page_pool_get_dma_addr(page),
- offset + pool->p.offset, dma_sync_size,
- page_pool_get_dma_dir(pool));
+ __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset,
+ dma_sync_size);
+}
+
+static inline void
+page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool,
+ const netmem_ref netmem, u32 offset,
+ u32 dma_sync_size)
+{
+ if (!pool->dma_sync_for_cpu)
+ return;
+
+ __page_pool_dma_sync_for_cpu(pool,
+ page_pool_get_dma_addr_netmem(netmem),
+ offset, dma_sync_size);
}
static inline bool page_pool_put(struct page_pool *pool)
@@ -454,4 +498,9 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
+static inline bool page_pool_is_unreadable(struct page_pool *pool)
+{
+ return !!pool->mp_ops;
+}
+
#endif /* _NET_PAGE_POOL_HELPERS_H */
diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
new file mode 100644
index 000000000000..ada4f968960a
--- /dev/null
+++ b/include/net/page_pool/memory_provider.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_PAGE_POOL_MEMORY_PROVIDER_H
+#define _NET_PAGE_POOL_MEMORY_PROVIDER_H
+
+#include <net/netmem.h>
+#include <net/page_pool/types.h>
+
+struct netdev_rx_queue;
+struct netlink_ext_ack;
+struct sk_buff;
+
+struct memory_provider_ops {
+ netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp);
+ bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem);
+ int (*init)(struct page_pool *pool);
+ void (*destroy)(struct page_pool *pool);
+ int (*nl_fill)(void *mp_priv, struct sk_buff *rsp,
+ struct netdev_rx_queue *rxq);
+ void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq);
+};
+
+bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
+void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
+void net_mp_niov_clear_page_pool(struct net_iov *niov);
+
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p);
+int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *p,
+ struct netlink_ext_ack *extack);
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p);
+void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *old_p);
+
+/**
+ * net_mp_netmem_place_in_cache() - give a netmem to a page pool
+ * @pool: the page pool to place the netmem into
+ * @netmem: netmem to give
+ *
+ * Push an accounted netmem into the page pool's allocation cache. The caller
+ * must ensure that there is space in the cache. It should only be called off
+ * the mp_ops->alloc_netmems() path.
+ */
+static inline void net_mp_netmem_place_in_cache(struct page_pool *pool,
+ netmem_ref netmem)
+{
+ pool->alloc.cache[pool->alloc.count++] = netmem;
+}
+
+#endif
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 386efddd2aac..1509a536cb85 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -6,6 +6,7 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
#include <linux/types.h>
+#include <linux/xarray.h>
#include <net/netmem.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
@@ -33,6 +34,9 @@
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
+/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */
+#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1)
+
/*
* Fast allocation side cache array/stack
*
@@ -152,8 +156,11 @@ struct page_pool_stats {
*/
#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
+struct memory_provider_ops;
+
struct pp_memory_provider_params {
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
};
struct page_pool {
@@ -164,7 +171,8 @@ struct page_pool {
bool has_init_callback:1; /* slow::init_callback is set */
bool dma_map:1; /* Perform DMA mapping */
- bool dma_sync:1; /* Perform DMA sync */
+ bool dma_sync:1; /* Perform DMA sync for device */
+ bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */
#ifdef CONFIG_PAGE_POOL_STATS
bool system:1; /* This is a global percpu pool */
#endif
@@ -215,6 +223,9 @@ struct page_pool {
struct ptr_ring ring;
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
+
+ struct xarray dma_mapped;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
@@ -241,7 +252,7 @@ struct page_pool {
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
-netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp);
+netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp);
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
@@ -254,12 +265,13 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
+void page_pool_enable_direct_recycling(struct page_pool *pool,
+ struct napi_struct *napi);
void page_pool_disable_direct_recycling(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
const struct xdp_mem_info *mem);
-void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count);
+void page_pool_put_netmem_bulk(netmem_ref *data, u32 count);
#else
static inline void page_pool_destroy(struct page_pool *pool)
{
@@ -271,8 +283,7 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool,
{
}
-static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count)
+static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
{
}
#endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4eb0ebb9e76c..c64fd896b1f9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -319,7 +319,7 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts,
* tcf_exts_has_actions - check if at least one action is present
* @exts: tc filter extensions handle
*
- * Returns true if at least one action is present.
+ * Returns: true if at least one action is present.
*/
static inline bool tcf_exts_has_actions(struct tcf_exts *exts)
{
@@ -501,7 +501,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
* through all ematches respecting their logic relations returning
* as soon as the result is obvious.
*
- * Returns 1 if the ematch tree as-one matches, no ematches are configured
+ * Returns: 1 if the ematch tree as-one matches, no ematches are configured
* or ematch is not enabled in the kernel, otherwise 0 is returned.
*/
static inline int tcf_em_tree_match(struct sk_buff *skb,
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index d7b7b6cd4aa1..8a75c73fc555 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
struct netlink_ext_ack *extack);
void qdisc_put_rtab(struct qdisc_rate_table *tab);
void qdisc_put_stab(struct qdisc_size_table *tab);
-void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev, struct netdev_queue *txq,
spinlock_t *root_lock, bool validate);
@@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch,
return true;
}
+static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
+{
+ if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
+ pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
+ txt, qdisc->ops->id, qdisc->handle >> 16);
+ qdisc->flags |= TCQ_F_WARN_NONWC;
+ }
+}
+
+static inline unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+ unsigned int len;
+
+ skb = sch->ops->peek(sch);
+ if (unlikely(skb == NULL)) {
+ qdisc_warn_nonwc("qdisc_peek_len", sch);
+ return 0;
+ }
+ len = qdisc_pkt_len(skb);
+
+ return len;
+}
+
#endif
diff --git a/include/net/route.h b/include/net/route.h
index 64949854d35d..3d3d6048ffca 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -28,6 +28,7 @@
#include <net/arp.h>
#include <net/ndisc.h>
#include <net/inet_dscp.h>
+#include <net/sock.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -129,6 +130,33 @@ struct in_device;
int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
+
+static inline void inet_sk_init_flowi4(const struct inet_sock *inet,
+ struct flowi4 *fl4)
+{
+ const struct ip_options_rcu *ip4_opt;
+ const struct sock *sk;
+ __be32 daddr;
+
+ rcu_read_lock();
+ ip4_opt = rcu_dereference(inet->inet_opt);
+
+ /* Source routing option overrides the socket destination address */
+ if (ip4_opt && ip4_opt->opt.srr)
+ daddr = ip4_opt->opt.faddr;
+ else
+ daddr = inet->inet_daddr;
+ rcu_read_unlock();
+
+ sk = &inet->sk;
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
+ ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
+ sk->sk_protocol, inet_sk_flowi_flags(sk), daddr,
+ inet->inet_saddr, inet->inet_dport,
+ inet->inet_sport, sk_uid(sk));
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
+}
+
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
const struct sk_buff *skb);
struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp,
@@ -185,20 +213,6 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
return ip_route_output_flow(net, fl4, sk);
}
-static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 *fl4,
- __be32 daddr, __be32 saddr,
- __be32 gre_key, __u8 tos, int oif)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = IPPROTO_GRE;
- fl4->fl4_gre_key = gre_key;
- return ip_route_output_key(net, fl4);
-}
-
enum skb_drop_reason
ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
@@ -312,9 +326,12 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
if (inet_test_bit(TRANSPARENT, sk))
flow_flags |= FLOWI_FLAG_ANYSRC;
+ if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !sport)
+ flow_flags |= FLOWI_FLAG_ANY_SPORT;
+
flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), protocol, flow_flags, dst,
- src, dport, sport, sk->sk_uid);
+ src, dport, sport, sk_uid(sk));
}
static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
diff --git a/include/net/rps.h b/include/net/rps.h
index a93401d23d66..d8ab3a08bcc4 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -39,7 +39,7 @@ struct rps_dev_flow {
* The rps_dev_flow_table structure contains a table of flow mappings.
*/
struct rps_dev_flow_table {
- unsigned int mask;
+ u8 log;
struct rcu_head rcu;
struct rps_dev_flow flows[];
};
@@ -57,9 +57,10 @@ struct rps_dev_flow_table {
* meaning we use 32-6=26 bits for the hash.
*/
struct rps_sock_flow_table {
- u32 mask;
+ struct rcu_head rcu;
+ u32 mask;
- u32 ents[] ____cacheline_aligned_in_smp;
+ u32 ents[] ____cacheline_aligned_in_smp;
};
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
@@ -122,6 +123,30 @@ static inline void sock_rps_record_flow(const struct sock *sk)
#endif
}
+static inline void sock_rps_delete_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *table;
+ u32 hash, index;
+
+ if (!static_branch_unlikely(&rfs_needed))
+ return;
+
+ hash = READ_ONCE(sk->sk_rxhash);
+ if (!hash)
+ return;
+
+ rcu_read_lock();
+ table = rcu_dereference(net_hotdata.rps_sock_flow_table);
+ if (table) {
+ index = hash & table->mask;
+ if (READ_ONCE(table->ents[index]) != RPS_NO_CPU)
+ WRITE_ONCE(table->ents[index], RPS_NO_CPU);
+ }
+ rcu_read_unlock();
+#endif
+}
+
static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index 69cb2e52b7da..979ac87b5d99 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -36,7 +36,7 @@
/**
* enum sk_rst_reason - the reasons of socket reset
*
- * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols.
+ * The reasons of sk reset, which are used in TCP/MPTCP protocols.
*
* There are three parts in order:
* 1) skb drop reasons: relying on drop reasons for such as passive reset
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index bc0069a8b6ea..ec65a8cebb99 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -70,6 +70,40 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
}
/**
+ * struct rtnl_newlink_params - parameters of rtnl_link_ops::newlink()
+ *
+ * @src_net: Source netns of rtnetlink socket
+ * @link_net: Link netns by IFLA_LINK_NETNSID, NULL if not specified
+ * @peer_net: Peer netns
+ * @tb: IFLA_* attributes
+ * @data: IFLA_INFO_DATA attributes
+ */
+struct rtnl_newlink_params {
+ struct net *src_net;
+ struct net *link_net;
+ struct net *peer_net;
+ struct nlattr **tb;
+ struct nlattr **data;
+};
+
+/* Get effective link netns from newlink params. Generally, this is link_net
+ * and falls back to src_net. But for compatibility, a driver may * choose to
+ * use dev_net(dev) instead.
+ */
+static inline struct net *rtnl_newlink_link_net(struct rtnl_newlink_params *p)
+{
+ return p->link_net ? : p->src_net;
+}
+
+/* Get peer netns from newlink params. Fallback to link netns if peer netns is
+ * not specified explicitly.
+ */
+static inline struct net *rtnl_newlink_peer_net(struct rtnl_newlink_params *p)
+{
+ return p->peer_net ? : rtnl_newlink_link_net(p);
+}
+
+/**
* struct rtnl_link_ops - rtnetlink link operations
*
* @list: Used internally, protected by link_ops_mutex and SRCU
@@ -125,10 +159,8 @@ struct rtnl_link_ops {
struct nlattr *data[],
struct netlink_ext_ack *extack);
- int (*newlink)(struct net *src_net,
- struct net_device *dev,
- struct nlattr *tb[],
- struct nlattr *data[],
+ int (*newlink)(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack);
int (*changelink)(struct net_device *dev,
struct nlattr *tb[],
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 24e48af7e8f7..638948be4c50 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -803,6 +803,14 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
return false;
}
+/* "noqueue" qdisc identified by not having any enqueue, see noqueue_init() */
+static inline bool qdisc_txq_has_no_queue(const struct netdev_queue *txq)
+{
+ struct Qdisc *qdisc = rcu_access_pointer(txq->qdisc);
+
+ return qdisc->enqueue == NULL;
+}
+
/* Is the device using the noop qdisc on all queues? */
static inline bool qdisc_tx_is_noop(const struct net_device *dev)
{
@@ -965,14 +973,6 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen,
*backlog = qstats.backlog;
}
-static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
-{
- __u32 qlen, backlog;
-
- qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
- qdisc_tree_reduce_backlog(sch, qlen, backlog);
-}
-
static inline void qdisc_purge_queue(struct Qdisc *sch)
{
__u32 qlen, backlog;
@@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
return skb;
}
+static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue(&sch->gso_skb);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
+ if (direct)
+ return __qdisc_dequeue_head(&sch->q);
+ else
+ return sch->dequeue(sch);
+}
+
static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
{
struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
@@ -1244,6 +1259,14 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_DROP;
}
+static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free,
+ enum skb_drop_reason reason)
+{
+ tcf_set_drop_reason(skb, reason);
+ return qdisc_drop(skb, sch, to_free);
+}
+
static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
diff --git a/include/net/scm.h b/include/net/scm.h
index 0d35c7c77a74..84c4707e78a5 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -102,123 +102,10 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
return __scm_send(sock, msg, scm);
}
-#ifdef CONFIG_SECURITY_NETWORK
-static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
-{
- char *secdata;
- u32 seclen;
- int err;
-
- if (test_bit(SOCK_PASSSEC, &sock->flags)) {
- err = security_secid_to_secctx(scm->secid, &secdata, &seclen);
-
- if (!err) {
- put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, seclen, secdata);
- security_release_secctx(secdata, seclen);
- }
- }
-}
-
-static inline bool scm_has_secdata(struct socket *sock)
-{
- return test_bit(SOCK_PASSSEC, &sock->flags);
-}
-#else
-static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
-{ }
-
-static inline bool scm_has_secdata(struct socket *sock)
-{
- return false;
-}
-#endif /* CONFIG_SECURITY_NETWORK */
-
-static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
-{
- struct file *pidfd_file = NULL;
- int len, pidfd;
-
- /* put_cmsg() doesn't return an error if CMSG is truncated,
- * that's why we need to opencode these checks here.
- */
- if (msg->msg_flags & MSG_CMSG_COMPAT)
- len = sizeof(struct compat_cmsghdr) + sizeof(int);
- else
- len = sizeof(struct cmsghdr) + sizeof(int);
-
- if (msg->msg_controllen < len) {
- msg->msg_flags |= MSG_CTRUNC;
- return;
- }
-
- if (!scm->pid)
- return;
-
- pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
-
- if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
- if (pidfd_file) {
- put_unused_fd(pidfd);
- fput(pidfd_file);
- }
-
- return;
- }
-
- if (pidfd_file)
- fd_install(pidfd, pidfd_file);
-}
-
-static inline bool __scm_recv_common(struct socket *sock, struct msghdr *msg,
- struct scm_cookie *scm, int flags)
-{
- if (!msg->msg_control) {
- if (test_bit(SOCK_PASSCRED, &sock->flags) ||
- test_bit(SOCK_PASSPIDFD, &sock->flags) ||
- scm->fp || scm_has_secdata(sock))
- msg->msg_flags |= MSG_CTRUNC;
- scm_destroy(scm);
- return false;
- }
-
- if (test_bit(SOCK_PASSCRED, &sock->flags)) {
- struct user_namespace *current_ns = current_user_ns();
- struct ucred ucreds = {
- .pid = scm->creds.pid,
- .uid = from_kuid_munged(current_ns, scm->creds.uid),
- .gid = from_kgid_munged(current_ns, scm->creds.gid),
- };
- put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
- }
-
- scm_passec(sock, msg, scm);
-
- if (scm->fp)
- scm_detach_fds(msg, scm);
-
- return true;
-}
-
-static inline void scm_recv(struct socket *sock, struct msghdr *msg,
- struct scm_cookie *scm, int flags)
-{
- if (!__scm_recv_common(sock, msg, scm, flags))
- return;
-
- scm_destroy_cred(scm);
-}
-
-static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
- struct scm_cookie *scm, int flags)
-{
- if (!__scm_recv_common(sock, msg, scm, flags))
- return;
-
- if (test_bit(SOCK_PASSPIDFD, &sock->flags))
- scm_pidfd_recv(msg, scm);
-
- scm_destroy_cred(scm);
-}
+void scm_recv(struct socket *sock, struct msghdr *msg,
+ struct scm_cookie *scm, int flags);
+void scm_recv_unix(struct socket *sock, struct msghdr *msg,
+ struct scm_cookie *scm, int flags);
static inline int scm_recv_one_fd(struct file *f, int __user *ufd,
unsigned int flags)
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index f514a0aa849e..654d37ec0402 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -15,8 +15,6 @@
* Dinakaran Joseph
* Jon Grimm <jgrimm@us.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Rewritten to use libcrc32c by:
* Vlad Yasevich <vladislav.yasevich@hp.com>
*/
@@ -25,42 +23,18 @@
#include <linux/types.h>
#include <linux/sctp.h>
-#include <linux/crc32c.h>
-#include <linux/crc32.h>
-
-static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum)
-{
- /* This uses the crypto implementation of crc32c, which is either
- * implemented w/ hardware support or resolves to __crc32c_le().
- */
- return (__force __wsum)crc32c((__force __u32)sum, buff, len);
-}
-
-static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
- int offset, int len)
-{
- return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
- (__force __u32)csum2, len);
-}
-
-static const struct skb_checksum_ops sctp_csum_ops = {
- .update = sctp_csum_update,
- .combine = sctp_csum_combine,
-};
static inline __le32 sctp_compute_cksum(const struct sk_buff *skb,
unsigned int offset)
{
struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
__le32 old = sh->checksum;
- __wsum new;
+ u32 new;
sh->checksum = 0;
- new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0,
- &sctp_csum_ops);
+ new = ~skb_crc32c(skb, offset, skb->len - offset, ~0);
sh->checksum = old;
-
- return cpu_to_le32((__force __u32)new);
+ return cpu_to_le32(new);
}
#endif /* __sctp_checksum_h__ */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 84e6b9fd5610..e96d1bd087f6 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -364,8 +364,6 @@ sctp_assoc_to_state(const struct sctp_association *asoc)
/* Look up the association by its id. */
struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id);
-int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp);
-
/* A macro to walk a list of skbs. */
#define sctp_skb_for_each(pos, head, tmp) \
skb_queue_walk_safe(head, pos, tmp)
@@ -636,7 +634,7 @@ static inline void sctp_transport_pl_reset(struct sctp_transport *t)
}
} else {
if (t->pl.state != SCTP_PL_DISABLED) {
- if (del_timer(&t->probe_timer))
+ if (timer_delete(&t->probe_timer))
sctp_transport_put(t);
t->pl.state = SCTP_PL_DISABLED;
}
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 64c42bd56bb2..3bfd261a53cc 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -161,7 +161,6 @@ const struct sctp_sm_table_entry *sctp_sm_lookup_event(
enum sctp_event_type event_type,
enum sctp_state state,
union sctp_subtype event_subtype);
-int sctp_chunk_iif(const struct sctp_chunk *);
struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
struct sctp_chunk *,
gfp_t gfp);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 31248cfdfb23..1ad7ce71d0a7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -775,6 +775,7 @@ struct sctp_transport {
/* Reference counting. */
refcount_t refcnt;
+ __u32 dead:1,
/* RTO-Pending : A flag used to track if one of the DATA
* chunks sent to this address is currently being
* used to compute a RTT. If this flag is 0,
@@ -784,7 +785,7 @@ struct sctp_transport {
* calculation completes (i.e. the DATA chunk
* is SACK'd) clear this flag.
*/
- __u32 rto_pending:1,
+ rto_pending:1,
/*
* hb_sent : a flag that signals that we have a pending
@@ -2151,8 +2152,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *,
const union sctp_addr *address,
const gfp_t gfp,
const int peer_state);
-void sctp_assoc_del_peer(struct sctp_association *asoc,
- const union sctp_addr *addr);
void sctp_assoc_rm_peer(struct sctp_association *asoc,
struct sctp_transport *peer);
void sctp_assoc_control_transport(struct sctp_association *asoc,
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index 21e7fa2a1813..cddebafb9f77 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -16,9 +16,5 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport);
u32 secure_tcpv6_ts_off(const struct net *net,
const __be32 *saddr, const __be32 *daddr);
-u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport);
-u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
- __be16 sport, __be16 dport);
#endif /* _NET_SECURE_SEQ */
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 468a67836e2f..4cb4326dfebe 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -159,7 +159,7 @@ struct linux_tls_mib {
#define __SNMP_ADD_STATS64(mib, field, addend) \
do { \
- __typeof__(*mib) *ptr = raw_cpu_ptr(mib); \
+ TYPEOF_UNQUAL(*mib) *ptr = raw_cpu_ptr(mib); \
u64_stats_update_begin(&ptr->syncp); \
ptr->mibs[field] += addend; \
u64_stats_update_end(&ptr->syncp); \
@@ -176,8 +176,7 @@ struct linux_tls_mib {
#define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
#define __SNMP_UPD_PO_STATS64(mib, basefield, addend) \
do { \
- __typeof__(*mib) *ptr; \
- ptr = raw_cpu_ptr((mib)); \
+ TYPEOF_UNQUAL(*mib) *ptr = raw_cpu_ptr(mib); \
u64_stats_update_begin(&ptr->syncp); \
ptr->mibs[basefield##PKTS]++; \
ptr->mibs[basefield##OCTETS] += addend; \
diff --git a/include/net/sock.h b/include/net/sock.h
index d3efb581c2ff..e3ab20345685 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -303,6 +303,7 @@ struct sk_filter;
* @sk_stamp: time stamp of last packet received
* @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
* @sk_tsflags: SO_TIMESTAMPING flags
+ * @sk_bpf_cb_flags: used in bpf_setsockopt()
* @sk_use_task_frag: allow sk_page_frag() to use current->task_frag.
* Sockets that can be used under memory reclaim should
* set this to false.
@@ -336,8 +337,16 @@ struct sk_filter;
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
+ * @sk_scm_recv_flags: all flags used by scm_recv()
+ * @sk_scm_credentials: flagged by SO_PASSCRED to recv SCM_CREDENTIALS
+ * @sk_scm_security: flagged by SO_PASSSEC to recv SCM_SECURITY
+ * @sk_scm_pidfd: flagged by SO_PASSPIDFD to recv SCM_PIDFD
+ * @sk_scm_rights: flagged by SO_PASSRIGHTS to recv SCM_RIGHTS
+ * @sk_scm_unused: unused flags for scm_recv()
* @ns_tracker: tracker for netns reference
* @sk_user_frags: xarray of pages the user is holding a reference on.
+ * @sk_owner: reference to the real owner of the socket that calls
+ * sock_lock_init_class_and_name().
*/
struct sock {
/*
@@ -520,11 +529,23 @@ struct sock {
#endif
int sk_disconnects;
- u8 sk_txrehash;
+ union {
+ u8 sk_txrehash;
+ u8 sk_scm_recv_flags;
+ struct {
+ u8 sk_scm_credentials : 1,
+ sk_scm_security : 1,
+ sk_scm_pidfd : 1,
+ sk_scm_rights : 1,
+ sk_scm_unused : 4;
+ };
+ };
u8 sk_clockid;
u8 sk_txtime_deadline_mode : 1,
sk_txtime_report_errors : 1,
sk_txtime_unused : 6;
+#define SK_BPF_CB_FLAG_TEST(SK, FLAG) ((SK)->sk_bpf_cb_flags & (FLAG))
+ u8 sk_bpf_cb_flags;
void *sk_user_data;
#ifdef CONFIG_SECURITY
@@ -544,6 +565,10 @@ struct sock {
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
struct xarray sk_user_frags;
+
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+ struct module *sk_owner;
+#endif
};
struct sock_bh_locked {
@@ -953,6 +978,8 @@ enum sock_flags {
SOCK_XDP, /* XDP is attached */
SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */
+ SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */
+ SOCK_TIMESTAMPING_ANY, /* Copy of sk_tsflags & TSFLAGS_ANY */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1283,10 +1310,6 @@ struct proto {
unsigned int inuse_idx;
#endif
-#if IS_ENABLED(CONFIG_MPTCP)
- int (*forward_alloc_get)(const struct sock *sk);
-#endif
-
bool (*stream_memory_free)(const struct sock *sk, int wake);
bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
@@ -1347,15 +1370,6 @@ int sock_load_diag_module(int family, int protocol);
INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
-static inline int sk_forward_alloc_get(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_MPTCP)
- if (sk->sk_prot->forward_alloc_get)
- return sk->sk_prot->forward_alloc_get(sk);
-#endif
- return READ_ONCE(sk->sk_forward_alloc);
-}
-
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
@@ -1591,6 +1605,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
sk_mem_reclaim(sk);
}
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+ __module_get(owner);
+ sk->sk_owner = owner;
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+ sk->sk_owner = NULL;
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+ module_put(sk->sk_owner);
+}
+#else
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+}
+#endif
/*
* Macro so as to not evaluate some arguments when
* lockdep is not enabled.
@@ -1600,13 +1643,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
*/
#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \
do { \
+ sk_owner_set(sk, THIS_MODULE); \
sk->sk_lock.owned = 0; \
init_waitqueue_head(&sk->sk_lock.wq); \
spin_lock_init(&(sk)->sk_lock.slock); \
debug_check_no_locks_freed((void *)&(sk)->sk_lock, \
- sizeof((sk)->sk_lock)); \
+ sizeof((sk)->sk_lock)); \
lockdep_set_class_and_name(&(sk)->sk_lock.slock, \
- (skey), (sname)); \
+ (skey), (sname)); \
lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
} while (0)
@@ -1753,7 +1797,6 @@ void sk_free(struct sock *sk);
void sk_net_refcnt_upgrade(struct sock *sk);
void sk_destruct(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
-void sk_free_unlock_clone(struct sock *sk);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority);
@@ -1805,6 +1848,8 @@ static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk,
}
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority);
+void *sock_kmemdup(struct sock *sk, const void *src,
+ int size, gfp_t priority);
void sock_kfree_s(struct sock *sk, void *mem, int size);
void sock_kzfree_s(struct sock *sk, void *mem, int size);
void sk_send_sigurg(struct sock *sk);
@@ -1821,13 +1866,17 @@ struct sockcm_cookie {
u32 mark;
u32 tsflags;
u32 ts_opt_id;
+ u32 priority;
+ u32 dmabuf_id;
};
static inline void sockcm_init(struct sockcm_cookie *sockc,
const struct sock *sk)
{
*sockc = (struct sockcm_cookie) {
- .tsflags = READ_ONCE(sk->sk_tsflags)
+ .mark = READ_ONCE(sk->sk_mark),
+ .tsflags = READ_ONCE(sk->sk_tsflags),
+ .priority = READ_ONCE(sk->sk_priority),
};
}
@@ -2027,6 +2076,7 @@ static inline void sock_orphan(struct sock *sk)
sock_set_flag(sk, SOCK_DEAD);
sk_set_socket(sk, NULL);
sk->sk_wq = NULL;
+ /* Note: sk_uid is unchanged. */
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -2037,18 +2087,25 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
rcu_assign_pointer(sk->sk_wq, &parent->wq);
parent->sk = sk;
sk_set_socket(sk, parent);
- sk->sk_uid = SOCK_INODE(parent)->i_uid;
+ WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
security_sock_graft(sk, parent);
write_unlock_bh(&sk->sk_callback_lock);
}
kuid_t sock_i_uid(struct sock *sk);
+
+static inline kuid_t sk_uid(const struct sock *sk)
+{
+ /* Paired with WRITE_ONCE() in sockfs_setattr() */
+ return READ_ONCE(sk->sk_uid);
+}
+
unsigned long __sock_i_ino(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
{
- return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
+ return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0);
}
static inline u32 net_tx_rndhash(void)
@@ -2572,8 +2629,8 @@ struct sock_skb_cb {
* using skb->cb[] would keep using it directly and utilize its
* alignment guarantee.
*/
-#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \
- sizeof(struct sock_skb_cb)))
+#define SOCK_SKB_CB_OFFSET (sizeof_field(struct sk_buff, cb) - \
+ sizeof(struct sock_skb_cb))
#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
SOCK_SKB_CB_OFFSET))
@@ -2662,12 +2719,13 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
{
#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_RCVTSTAMP) | \
- (1UL << SOCK_RCVMARK))
+ (1UL << SOCK_RCVMARK) | \
+ (1UL << SOCK_RCVPRIORITY) | \
+ (1UL << SOCK_TIMESTAMPING_ANY))
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_RECV_CMSGS ||
- READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+ if (READ_ONCE(sk->sk_flags) & FLAGS_RECV_CMSGS)
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
@@ -2702,8 +2760,6 @@ static inline void _sock_tx_timestamp(struct sock *sk,
*tskey = atomic_inc_return(&sk->sk_tskey) - 1;
}
}
- if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
- *tx_flags |= SKBTX_WIFI_STATUS;
}
static inline void sock_tx_timestamp(struct sock *sk,
@@ -2741,9 +2797,14 @@ static inline bool sk_is_udp(const struct sock *sk)
sk->sk_protocol == IPPROTO_UDP;
}
+static inline bool sk_is_unix(const struct sock *sk)
+{
+ return sk->sk_family == AF_UNIX;
+}
+
static inline bool sk_is_stream_unix(const struct sock *sk)
{
- return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM;
+ return sk_is_unix(sk) && sk->sk_type == SOCK_STREAM;
}
static inline bool sk_is_vsock(const struct sock *sk)
@@ -2751,6 +2812,13 @@ static inline bool sk_is_vsock(const struct sock *sk)
return sk->sk_family == AF_VSOCK;
}
+static inline bool sk_may_scm_recv(const struct sock *sk)
+{
+ return (IS_ENABLED(CONFIG_UNIX) && sk->sk_family == AF_UNIX) ||
+ sk->sk_family == AF_NETLINK ||
+ (IS_ENABLED(CONFIG_BT) && sk->sk_family == AF_BLUETOOTH);
+}
+
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
@@ -2790,6 +2858,12 @@ sk_is_refcounted(struct sock *sk)
return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
}
+static inline bool
+sk_requests_wifi_status(struct sock *sk)
+{
+ return sk && sk_fullsock(sk) && sock_flag(sk, SOCK_WIFI_STATUS);
+}
+
/* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev.
* Check decrypted mark in case skb_orphan() cleared socket.
@@ -2917,6 +2991,13 @@ int sock_set_timestamping(struct sock *sk, int optname,
struct so_timestamping timestamping);
void sock_enable_timestamps(struct sock *sk);
+#if defined(CONFIG_CGROUP_BPF)
+void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op);
+#else
+static inline void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op)
+{
+}
+#endif
void sock_no_linger(struct sock *sk);
void sock_set_keepalive(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
@@ -2937,8 +3018,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
- if (sk->sk_prot->sock_is_readable)
- return sk->sk_prot->sock_is_readable(sk);
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot->sock_is_readable)
+ return prot->sock_is_readable(sk);
+
return false;
}
#endif /* _SOCK_H */
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 41e2ce9e9e10..0ed73e364faa 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -43,6 +43,8 @@ struct strparser;
struct strp_callbacks {
int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+ int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
int (*read_sock_done)(struct strparser *strp, int err);
void (*abort_parser)(struct strparser *strp, int err);
void (*lock)(struct strparser *strp);
@@ -112,8 +114,6 @@ static inline void strp_pause(struct strparser *strp)
/* May be called without holding lock for attached socket */
void strp_unpause(struct strparser *strp);
-/* Must be called with process lock held (lock_sock) */
-void __strp_unpause(struct strparser *strp);
static inline void save_strp_stats(struct strparser *strp,
struct strp_aggr_stats *agg_stats)
diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h
index f071c1d70a25..a04bcac7adf4 100644
--- a/include/net/tc_act/tc_ctinfo.h
+++ b/include/net/tc_act/tc_ctinfo.h
@@ -18,9 +18,9 @@ struct tcf_ctinfo_params {
struct tcf_ctinfo {
struct tc_action common;
struct tcf_ctinfo_params __rcu *params;
- u64 stats_dscp_set;
- u64 stats_dscp_error;
- u64 stats_cpmark_set;
+ atomic64_t stats_dscp_set;
+ atomic64_t stats_dscp_error;
+ atomic64_t stats_cpmark_set;
};
enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bc04599547c3..5078ad868fee 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -26,6 +26,7 @@
#include <linux/kref.h>
#include <linux/ktime.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/bits.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -144,8 +145,9 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
#define TCP_DELACK_MIN 4U
#define TCP_ATO_MIN 4U
#endif
-#define TCP_RTO_MAX ((unsigned)(120*HZ))
-#define TCP_RTO_MIN ((unsigned)(HZ/5))
+#define TCP_RTO_MAX_SEC 120
+#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ))
+#define TCP_RTO_MIN ((unsigned)(HZ / 5))
#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
@@ -372,26 +374,64 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
}
}
-#define TCP_ECN_OK 1
-#define TCP_ECN_QUEUE_CWR 2
-#define TCP_ECN_DEMAND_CWR 4
-#define TCP_ECN_SEEN 8
+#define TCP_ECN_MODE_RFC3168 BIT(0)
+#define TCP_ECN_QUEUE_CWR BIT(1)
+#define TCP_ECN_DEMAND_CWR BIT(2)
+#define TCP_ECN_SEEN BIT(3)
+#define TCP_ECN_MODE_ACCECN BIT(4)
+
+#define TCP_ECN_DISABLED 0
+#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
+#define TCP_ECN_MODE_ANY (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
+
+static inline bool tcp_ecn_mode_any(const struct tcp_sock *tp)
+{
+ return tp->ecn_flags & TCP_ECN_MODE_ANY;
+}
+
+static inline bool tcp_ecn_mode_rfc3168(const struct tcp_sock *tp)
+{
+ return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_RFC3168;
+}
+
+static inline bool tcp_ecn_mode_accecn(const struct tcp_sock *tp)
+{
+ return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_ACCECN;
+}
+
+static inline bool tcp_ecn_disabled(const struct tcp_sock *tp)
+{
+ return !tcp_ecn_mode_any(tp);
+}
+
+static inline bool tcp_ecn_mode_pending(const struct tcp_sock *tp)
+{
+ return (tp->ecn_flags & TCP_ECN_MODE_PENDING) == TCP_ECN_MODE_PENDING;
+}
+
+static inline void tcp_ecn_mode_set(struct tcp_sock *tp, u8 mode)
+{
+ tp->ecn_flags &= ~TCP_ECN_MODE_ANY;
+ tp->ecn_flags |= mode;
+}
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
TCP_TW_RST = 1,
TCP_TW_ACK = 2,
- TCP_TW_SYN = 3
+ TCP_TW_SYN = 3,
+ TCP_TW_ACK_OOW = 4
};
enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb,
const struct tcphdr *th,
- u32 *tw_isn);
+ u32 *tw_isn,
+ enum skb_drop_reason *drop_reason);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
- bool *lost_race);
+ bool *lost_race, enum skb_drop_reason *drop_reason);
enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
@@ -416,6 +456,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
+void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
@@ -667,7 +708,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
enum sk_rst_reason reason);
int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now);
-void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags);
void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk);
@@ -743,6 +784,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
void tcp_read_done(struct sock *sk, size_t len);
@@ -753,10 +797,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu);
int tcp_mss_to_mtu(struct sock *sk, int mss);
void tcp_mtup_init(struct sock *sk);
+static inline unsigned int tcp_rto_max(const struct sock *sk)
+{
+ return READ_ONCE(inet_csk(sk)->icsk_rto_max);
+}
+
static inline void tcp_bound_rto(struct sock *sk)
{
- if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
- inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
+ inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk));
}
static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
@@ -797,7 +845,7 @@ u32 tcp_delack_max(const struct sock *sk);
static inline u32 tcp_rto_min(const struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = inet_csk(sk)->icsk_rto_min;
+ u32 rto_min = READ_ONCE(inet_csk(sk)->icsk_rto_min);
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
@@ -925,15 +973,22 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
-#define TCPHDR_FIN 0x01
-#define TCPHDR_SYN 0x02
-#define TCPHDR_RST 0x04
-#define TCPHDR_PSH 0x08
-#define TCPHDR_ACK 0x10
-#define TCPHDR_URG 0x20
-#define TCPHDR_ECE 0x40
-#define TCPHDR_CWR 0x80
-
+#define TCPHDR_FIN BIT(0)
+#define TCPHDR_SYN BIT(1)
+#define TCPHDR_RST BIT(2)
+#define TCPHDR_PSH BIT(3)
+#define TCPHDR_ACK BIT(4)
+#define TCPHDR_URG BIT(5)
+#define TCPHDR_ECE BIT(6)
+#define TCPHDR_CWR BIT(7)
+#define TCPHDR_AE BIT(8)
+#define TCPHDR_FLAGS_MASK (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
+ TCPHDR_PSH | TCPHDR_ACK | TCPHDR_URG | \
+ TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
+#define tcp_flags_ntohs(th) (ntohs(*(__be16 *)&tcp_flag_word(th)) & \
+ TCPHDR_FLAGS_MASK)
+
+#define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
/* State flags for sacked in struct tcp_skb_cb */
@@ -968,14 +1023,16 @@ struct tcp_skb_cb {
u16 tcp_gso_size;
};
};
- __u8 tcp_flags; /* TCP header flags. (tcp[13]) */
+ __u16 tcp_flags; /* TCP header flags (tcp[12-13])*/
__u8 sacked; /* State flags for SACK. */
__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
- __u8 txstamp_ack:1, /* Record TX timestamp for ack? */
+#define TSTAMP_ACK_SK 0x1
+#define TSTAMP_ACK_BPF 0x2
+ __u8 txstamp_ack:2, /* Record TX timestamp for ack? */
eor:1, /* Is skb MSG_EOR marked? */
has_rxtstamp:1, /* SKB has a RX timestamp */
- unused:5;
+ unused:4;
__u32 ack_seq; /* Sequence number ACK'd */
union {
struct {
@@ -1121,9 +1178,9 @@ enum tcp_ca_ack_event_flags {
#define TCP_CA_UNSPEC 0
/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
-#define TCP_CONG_NON_RESTRICTED 0x1
+#define TCP_CONG_NON_RESTRICTED BIT(0)
/* Requires ECN/ECT set on all packets */
-#define TCP_CONG_NEEDS_ECN 0x2
+#define TCP_CONG_NEEDS_ECN BIT(1)
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
union tcp_cc_info;
@@ -1437,10 +1494,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk)
static inline void tcp_reset_xmit_timer(struct sock *sk,
const int what,
unsigned long when,
- const unsigned long max_when)
+ bool pace_delay)
{
- inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
- max_when);
+ if (pace_delay)
+ when += tcp_pacing_delay(sk);
+ inet_csk_reset_xmit_timer(sk, what, when,
+ tcp_rto_max(sk));
}
/* Something is really bad, we could not queue an additional packet,
@@ -1469,7 +1528,7 @@ static inline void tcp_check_probe_timer(struct sock *sk)
{
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- tcp_probe0_base(sk), TCP_RTO_MAX);
+ tcp_probe0_base(sk), true);
}
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
@@ -1831,7 +1890,7 @@ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp,
* @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
* @c: returned tcp_sigpool for usage (uninitialized on failure)
*
- * Returns 0 on success, error otherwise.
+ * Returns: 0 on success, error otherwise.
*/
int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
/**
@@ -2585,8 +2644,8 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
- int (*get_info)(struct sock *sk, struct sk_buff *skb);
- size_t (*get_info_size)(const struct sock *sk);
+ int (*get_info)(struct sock *sk, struct sk_buff *skb, bool net_admin);
+ size_t (*get_info_size)(const struct sock *sk, bool net_admin);
/* clone ulp */
void (*clone)(const struct request_sock *req, struct sock *newsk,
const gfp_t priority);
@@ -2613,6 +2672,11 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+#ifdef CONFIG_BPF_STREAM_PARSER
+struct strparser;
+int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
+#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET
@@ -2663,6 +2727,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
if (sk_fullsock(sk)) {
sock_ops.is_fullsock = 1;
+ sock_ops.is_locked_tcp_sock = 1;
sock_owned_by_me(sk);
}
@@ -2751,9 +2816,9 @@ extern struct static_key_false tcp_have_smc;
#endif
#if IS_ENABLED(CONFIG_TLS_DEVICE)
-void clean_acked_data_enable(struct inet_connection_sock *icsk,
+void clean_acked_data_enable(struct tcp_sock *tp,
void (*cad)(struct sock *sk, u32 ack_seq));
-void clean_acked_data_disable(struct inet_connection_sock *icsk);
+void clean_acked_data_disable(struct tcp_sock *tp);
void clean_acked_data_flush(void);
#endif
diff --git a/include/net/tls.h b/include/net/tls.h
index 61fef2880114..857340338b69 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -59,6 +59,8 @@ struct tls_rec;
#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type)
+#define TLS_HANDSHAKE_KEYUPDATE 24 /* rfc8446 B.3: Key update */
+
#define TLS_AAD_SPACE_SIZE 13
#define TLS_MAX_IV_SIZE 16
@@ -130,6 +132,7 @@ struct tls_sw_context_rx {
u8 async_capable:1;
u8 zc_capable:1;
u8 reader_contended:1;
+ bool key_update_pending;
struct tls_strparser strp;
diff --git a/include/net/udp.h b/include/net/udp.h
index 6e89520e100d..7a4524243b19 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -290,6 +290,7 @@ static inline void udp_lib_init_sock(struct sock *sk)
struct udp_sock *up = udp_sk(sk);
skb_queue_head_init(&up->reader_queue);
+ INIT_HLIST_NODE(&up->tunnel_list);
up->forward_threshold = sk->sk_rcvbuf >> 2;
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
}
@@ -586,6 +587,16 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
{
netdev_features_t features = NETIF_F_SG;
struct sk_buff *segs;
+ int drop_count;
+
+ /*
+ * Segmentation in UDP receive path is only for UDP GRO, drop udp
+ * fragmentation offload (UFO) packets.
+ */
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+ drop_count = 1;
+ goto drop;
+ }
/* Avoid csum recalculation by skb_segment unless userspace explicitly
* asks for the final checksum values
@@ -609,16 +620,18 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
*/
segs = __skb_gso_segment(skb, features, false);
if (IS_ERR_OR_NULL(segs)) {
- int segs_nr = skb_shinfo(skb)->gso_segs;
-
- atomic_add(segs_nr, &sk->sk_drops);
- SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr);
- kfree_skb(skb);
- return NULL;
+ drop_count = skb_shinfo(skb)->gso_segs;
+ goto drop;
}
consume_skb(skb);
return segs;
+
+drop:
+ atomic_add(drop_count, &sk->sk_drops);
+ SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count);
+ kfree_skb(skb);
+ return NULL;
}
static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index a93dc51f6323..2df3b8344eb5 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -191,6 +191,21 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
}
#endif
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add);
+void udp_tunnel_update_gro_rcv(struct sock *sk, bool add);
+#else
+static inline void udp_tunnel_update_gro_lookup(struct net *net,
+ struct sock *sk, bool add) {}
+static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {}
+#endif
+
+static inline void udp_tunnel_cleanup_gro(struct sock *sk)
+{
+ udp_tunnel_update_gro_rcv(sk, false);
+ udp_tunnel_update_gro_lookup(sock_net(sk), sk, false);
+}
+
static inline void udp_tunnel_encap_enable(struct sock *sk)
{
if (udp_test_and_set_bit(ENCAP_ENABLED, sk))
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 33ba6fc151cf..e2f7ca045d3e 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -227,6 +227,7 @@ struct vxlan_config {
unsigned int addrmax;
bool no_share;
enum ifla_vxlan_df df;
+ struct vxlanhdr reserved_bits;
};
enum {
@@ -295,7 +296,7 @@ struct vxlan_dev {
struct vxlan_rdst default_dst; /* default destination */
struct timer_list age_timer;
- spinlock_t hash_lock[FDB_HASH_SIZE];
+ spinlock_t hash_lock;
unsigned int addrcnt;
struct gro_cells gro_cells;
@@ -303,9 +304,10 @@ struct vxlan_dev {
struct vxlan_vni_group __rcu *vnigrp;
- struct hlist_head fdb_head[FDB_HASH_SIZE];
+ struct rhashtable fdb_hash_tbl;
struct rhashtable mdb_tbl;
+ struct hlist_head fdb_list;
struct hlist_head mdb_list;
unsigned int mdb_seq;
};
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b5b10f2b88e5..b40f1f96cb11 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -11,6 +11,8 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h> /* skb_shared_info */
+#include <net/page_pool/types.h>
+
/**
* DOC: XDP RX-queue information
*
@@ -87,7 +89,7 @@ struct xdp_buff {
u32 flags; /* supported values defined in xdp_buff_flags */
};
-static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
}
@@ -102,7 +104,8 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
}
-static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+static __always_inline bool
+xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
}
@@ -143,15 +146,16 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
static inline struct skb_shared_info *
-xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
+xdp_get_shared_info_from_buff(const struct xdp_buff *xdp)
{
return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}
-static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+static __always_inline unsigned int
+xdp_get_buff_len(const struct xdp_buff *xdp)
{
unsigned int len = xdp->data_end - xdp->data;
- struct skb_shared_info *sinfo;
+ const struct skb_shared_info *sinfo;
if (likely(!xdp_buff_has_frags(xdp)))
goto out;
@@ -162,26 +166,114 @@ out:
return len;
}
+void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp);
+
+/**
+ * __xdp_buff_add_frag - attach frag to &xdp_buff
+ * @xdp: XDP buffer to attach the frag to
+ * @netmem: network memory containing the frag
+ * @offset: offset at which the frag starts
+ * @size: size of the frag
+ * @truesize: total memory size occupied by the frag
+ * @try_coalesce: whether to try coalescing the frags (not valid for XSk)
+ *
+ * Attach frag to the XDP buffer. If it currently has no frags attached,
+ * initialize the related fields, otherwise check that the frag number
+ * didn't reach the limit of ``MAX_SKB_FRAGS``. If possible, try coalescing
+ * the frag with the previous one.
+ * The function doesn't check/update the pfmemalloc bit. Please use the
+ * non-underscored wrapper in drivers.
+ *
+ * Return: true on success, false if there's no space for the frag in
+ * the shared info struct.
+ */
+static inline bool __xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem,
+ u32 offset, u32 size, u32 truesize,
+ bool try_coalesce)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ skb_frag_t *prev;
+ u32 nr_frags;
+
+ if (!xdp_buff_has_frags(xdp)) {
+ xdp_buff_set_frags_flag(xdp);
+
+ nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ sinfo->xdp_frags_truesize = 0;
+
+ goto fill;
+ }
+
+ nr_frags = sinfo->nr_frags;
+ prev = &sinfo->frags[nr_frags - 1];
+
+ if (try_coalesce && netmem == skb_frag_netmem(prev) &&
+ offset == skb_frag_off(prev) + skb_frag_size(prev)) {
+ skb_frag_size_add(prev, size);
+ /* Guaranteed to only decrement the refcount */
+ xdp_return_frag(netmem, xdp);
+ } else if (unlikely(nr_frags == MAX_SKB_FRAGS)) {
+ return false;
+ } else {
+fill:
+ __skb_fill_netmem_desc_noacc(sinfo, nr_frags++, netmem,
+ offset, size);
+ }
+
+ sinfo->nr_frags = nr_frags;
+ sinfo->xdp_frags_size += size;
+ sinfo->xdp_frags_truesize += truesize;
+
+ return true;
+}
+
+/**
+ * xdp_buff_add_frag - attach frag to &xdp_buff
+ * @xdp: XDP buffer to attach the frag to
+ * @netmem: network memory containing the frag
+ * @offset: offset at which the frag starts
+ * @size: size of the frag
+ * @truesize: total memory size occupied by the frag
+ *
+ * Version of __xdp_buff_add_frag() which takes care of the pfmemalloc bit.
+ *
+ * Return: true on success, false if there's no space for the frag in
+ * the shared info struct.
+ */
+static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem,
+ u32 offset, u32 size, u32 truesize)
+{
+ if (!__xdp_buff_add_frag(xdp, netmem, offset, size, truesize, true))
+ return false;
+
+ if (unlikely(netmem_is_pfmemalloc(netmem)))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+
+ return true;
+}
+
struct xdp_frame {
void *data;
- u16 len;
- u16 headroom;
+ u32 len;
+ u32 headroom;
u32 metasize; /* uses lower 8-bits */
/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
- * while mem info is valid on remote CPU.
+ * while mem_type is valid on remote CPU.
*/
- struct xdp_mem_info mem;
+ enum xdp_mem_type mem_type:32;
struct net_device *dev_rx; /* used by cpumap */
u32 frame_sz;
u32 flags; /* supported values defined in xdp_buff_flags */
};
-static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame)
{
return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
}
-static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+static __always_inline bool
+xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame)
{
return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
}
@@ -189,18 +281,16 @@ static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame
#define XDP_BULK_QUEUE_SIZE 16
struct xdp_frame_bulk {
int count;
- void *xa;
- void *q[XDP_BULK_QUEUE_SIZE];
+ netmem_ref q[XDP_BULK_QUEUE_SIZE];
};
static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
{
- /* bq->count will be zero'ed when bq->xa gets updated */
- bq->xa = NULL;
+ bq->count = 0;
}
static inline struct skb_shared_info *
-xdp_get_shared_info_from_frame(struct xdp_frame *frame)
+xdp_get_shared_info_from_frame(const struct xdp_frame *frame)
{
void *data_hard_start = frame->data - frame->headroom - sizeof(*frame);
@@ -226,7 +316,14 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
unsigned int size, unsigned int truesize,
bool pfmemalloc)
{
- skb_shinfo(skb)->nr_frags = nr_frags;
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ sinfo->nr_frags = nr_frags;
+ /*
+ * ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``,
+ * reset it after that these fields aren't used anymore.
+ */
+ sinfo->destructor_arg = NULL;
skb->len += size;
skb->data_len += size;
@@ -238,17 +335,19 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
+struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
+struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp);
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev);
struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct net_device *dev);
-int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
static inline
-void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
+void xdp_convert_frame_to_buff(const struct xdp_frame *frame,
+ struct xdp_buff *xdp)
{
xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame);
xdp->data = frame->data;
@@ -259,7 +358,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
}
static inline
-int xdp_update_frame_from_buff(struct xdp_buff *xdp,
+int xdp_update_frame_from_buff(const struct xdp_buff *xdp,
struct xdp_frame *xdp_frame)
{
int metasize, headroom;
@@ -301,24 +400,33 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0))
return NULL;
- /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
- xdp_frame->mem = xdp->rxq->mem;
+ /* rxq only valid until napi_schedule ends, convert to xdp_mem_type */
+ xdp_frame->mem_type = xdp->rxq->mem.type;
return xdp_frame;
}
-void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
- struct xdp_buff *xdp);
+void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
+ bool napi_direct, struct xdp_buff *xdp);
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
-void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
-static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
+static inline void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
+{
+ if (unlikely(!bq->count))
+ return;
+
+ page_pool_put_netmem_bulk(bq->q, bq->count);
+ bq->count = 0;
+}
+
+static __always_inline unsigned int
+xdp_get_frame_len(const struct xdp_frame *xdpf)
{
- struct skb_shared_info *sinfo;
+ const struct skb_shared_info *sinfo;
unsigned int len = xdpf->len;
if (likely(!xdp_frame_has_frags(xdpf)))
@@ -350,6 +458,38 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
int xdp_reg_mem_model(struct xdp_mem_info *mem,
enum xdp_mem_type type, void *allocator);
void xdp_unreg_mem_model(struct xdp_mem_info *mem);
+int xdp_reg_page_pool(struct page_pool *pool);
+void xdp_unreg_page_pool(const struct page_pool *pool);
+void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq,
+ const struct page_pool *pool);
+
+/**
+ * xdp_rxq_info_attach_mem_model - attach registered mem info to RxQ info
+ * @xdp_rxq: XDP RxQ info to attach the memory info to
+ * @mem: already registered memory info
+ *
+ * If the driver registers its memory providers manually, it must use this
+ * function instead of xdp_rxq_info_reg_mem_model().
+ */
+static inline void
+xdp_rxq_info_attach_mem_model(struct xdp_rxq_info *xdp_rxq,
+ const struct xdp_mem_info *mem)
+{
+ xdp_rxq->mem = *mem;
+}
+
+/**
+ * xdp_rxq_info_detach_mem_model - detach registered mem info from RxQ info
+ * @xdp_rxq: XDP RxQ info to detach the memory info from
+ *
+ * If the driver registers its memory providers manually and then attaches it
+ * via xdp_rxq_info_attach_mem_model(), it must call this function before
+ * xdp_rxq_info_unreg().
+ */
+static inline void xdp_rxq_info_detach_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+ xdp_rxq->mem = (struct xdp_mem_info){ };
+}
/* Drivers not supporting XDP metadata can use this helper, which
* rejects any room expansion for metadata as a result.
@@ -476,8 +616,12 @@ struct xdp_metadata_ops {
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
void xdp_set_features_flag(struct net_device *dev, xdp_features_t val);
+void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val);
void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg);
+void xdp_features_set_redirect_target_locked(struct net_device *dev,
+ bool support_sg);
void xdp_features_clear_redirect_target(struct net_device *dev);
+void xdp_features_clear_redirect_target_locked(struct net_device *dev);
#else
static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; }
static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; }
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index bfe625b55d55..e8bd6ddb7b12 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -71,9 +71,6 @@ struct xdp_sock {
*/
u32 tx_budget_spent;
- /* Protects generic receive. */
- spinlock_t rx_lock;
-
/* Statistics */
u64 rx_dropped;
u64 rx_queue_full;
@@ -110,11 +107,16 @@ struct xdp_sock {
* indicates position where checksumming should start.
* csum_offset indicates position where checksum should be stored.
*
+ * void (*tmo_request_launch_time)(u64 launch_time, void *priv)
+ * Called when AF_XDP frame requested launch time HW offload support.
+ * launch_time indicates the PTP time at which the device can schedule the
+ * packet for transmission.
*/
struct xsk_tx_metadata_ops {
void (*tmo_request_timestamp)(void *priv);
u64 (*tmo_fill_timestamp)(void *priv);
void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
+ void (*tmo_request_launch_time)(u64 launch_time, void *priv);
};
#ifdef CONFIG_XDP_SOCKETS
@@ -162,6 +164,11 @@ static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
if (!meta)
return;
+ if (ops->tmo_request_launch_time)
+ if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME)
+ ops->tmo_request_launch_time(meta->request.launch_time,
+ priv);
+
if (ops->tmo_request_timestamp)
if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
ops->tmo_request_timestamp(priv);
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 7a7316d9c0da..513c8e9704f6 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -92,7 +92,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return xp_alloc(pool);
}
-static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+static inline bool xsk_is_eop_desc(const struct xdp_desc *desc)
{
return !xp_mb_desc(desc);
}
@@ -127,14 +127,24 @@ out:
xp_free(xskb);
}
-static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+static inline bool xsk_buff_add_frag(struct xdp_buff *head,
+ struct xdp_buff *xdp)
{
- struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
+ const void *data = xdp->data;
+ struct xdp_buff_xsk *frag;
+
+ if (!__xdp_buff_add_frag(head, virt_to_netmem(data),
+ offset_in_page(data), xdp->data_end - data,
+ xdp->frame_sz, false))
+ return false;
+ frag = container_of(xdp, struct xdp_buff_xsk, xdp);
list_add_tail(&frag->list_node, &frag->pool->xskb_list);
+
+ return true;
}
-static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
struct xdp_buff *ret = NULL;
@@ -186,30 +196,56 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return xp_raw_get_data(pool, addr);
}
+/**
+ * xsk_buff_raw_get_ctx - get &xdp_desc context
+ * @pool: XSk buff pool desc address belongs to
+ * @addr: desc address (from userspace)
+ *
+ * Wrapper for xp_raw_get_ctx() to be used in drivers, see its kdoc for
+ * details.
+ *
+ * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata
+ * pointer, if it is present and valid (initialized to %NULL otherwise).
+ */
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return xp_raw_get_ctx(pool, addr);
+}
+
#define XDP_TXMD_FLAGS_VALID ( \
XDP_TXMD_FLAGS_TIMESTAMP | \
XDP_TXMD_FLAGS_CHECKSUM | \
+ XDP_TXMD_FLAGS_LAUNCH_TIME | \
0)
-static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+static inline bool
+xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta)
{
return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
{
struct xsk_tx_metadata *meta;
if (!pool->tx_metadata_len)
return NULL;
- meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+ meta = data - pool->tx_metadata_len;
if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
return NULL; /* no way to signal the error to the user */
return meta;
}
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ return __xsk_buff_get_metadata(pool, xp_raw_get_data(pool, addr));
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -323,7 +359,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return NULL;
}
-static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+static inline bool xsk_is_eop_desc(const struct xdp_desc *desc)
{
return false;
}
@@ -342,11 +378,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{
}
-static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+static inline bool xsk_buff_add_frag(struct xdp_buff *head,
+ struct xdp_buff *xdp)
{
+ return false;
}
-static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
{
return NULL;
}
@@ -375,12 +413,25 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return NULL;
}
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return (struct xdp_desc_ctx){ };
+}
+
static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
{
return false;
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
+{
+ return NULL;
+}
+
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
{
return NULL;
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2c4eda6a8596..f3014e4f54fc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -38,6 +38,7 @@
#define XFRM_PROTO_COMP 108
#define XFRM_PROTO_IPIP 4
#define XFRM_PROTO_IPV6 41
+#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG
#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS
@@ -146,8 +147,19 @@ enum {
};
struct xfrm_dev_offload {
+ /* The device for this offload.
+ * Device drivers should not use this directly, as that will prevent
+ * them from working with bonding device. Instead, the device passed
+ * to the add/delete callbacks should be used.
+ */
struct net_device *dev;
netdevice_tracker dev_tracker;
+ /* This is a private pointer used by the bonding driver (and eventually
+ * should be moved there). Device drivers should not use it.
+ * Protected by xfrm_state.lock AND bond.ipsec_lock in most cases,
+ * except in the .xdo_dev_state_del() flow, where only xfrm_state.lock
+ * is held.
+ */
struct net_device *real_dev;
unsigned long offload_handle;
u8 dir : 2;
@@ -213,6 +225,7 @@ struct xfrm_state {
u16 family;
xfrm_address_t saddr;
int header_len;
+ int enc_hdr_len;
int trailer_len;
u32 extra_flags;
struct xfrm_mark smark;
@@ -234,7 +247,6 @@ struct xfrm_state {
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
- struct sock __rcu *encap_sk;
/* NAT keepalive */
u32 nat_keepalive_interval; /* seconds */
@@ -303,6 +315,9 @@ struct xfrm_state {
* interpreted by xfrm_type methods. */
void *data;
u8 dir;
+
+ const struct xfrm_mode_cbs *mode_cbs;
+ void *mode_data;
};
static inline struct net *xs_net(struct xfrm_state *x)
@@ -426,7 +441,6 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo);
int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo);
void xfrm_flush_gc(void);
-void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_type {
struct module *owner;
@@ -459,6 +473,54 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
+void xfrm_set_type_offload(struct xfrm_state *x, bool try_load);
+static inline void xfrm_unset_type_offload(struct xfrm_state *x)
+{
+ if (!x->type_offload)
+ return;
+
+ module_put(x->type_offload->owner);
+ x->type_offload = NULL;
+}
+
+/**
+ * struct xfrm_mode_cbs - XFRM mode callbacks
+ * @owner: module owner or NULL
+ * @init_state: Add/init mode specific state in `xfrm_state *x`
+ * @clone_state: Copy mode specific values from `orig` to new state `x`
+ * @destroy_state: Cleanup mode specific state from `xfrm_state *x`
+ * @user_init: Process mode specific netlink attributes from user
+ * @copy_to_user: Add netlink attributes to `attrs` based on state in `x`
+ * @sa_len: Return space required to store mode specific netlink attributes
+ * @get_inner_mtu: Return avail payload space after removing encap overhead
+ * @input: Process received packet from SA using mode
+ * @output: Output given packet using mode
+ * @prepare_output: Add mode specific encapsulation to packet in skb. On return
+ * `transport_header` should point at ESP header, `network_header` should
+ * point at outer IP header and `mac_header` should opint at the
+ * protocol/nexthdr field of the outer IP.
+ *
+ * One should examine and understand the specific uses of these callbacks in
+ * xfrm for further detail on how and when these functions are called. RTSL.
+ */
+struct xfrm_mode_cbs {
+ struct module *owner;
+ int (*init_state)(struct xfrm_state *x);
+ int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig);
+ void (*destroy_state)(struct xfrm_state *x);
+ int (*user_init)(struct net *net, struct xfrm_state *x,
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack);
+ int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
+ unsigned int (*sa_len)(const struct xfrm_state *x);
+ u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
+ int (*input)(struct xfrm_state *x, struct sk_buff *skb);
+ int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
+ int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
+};
+
+int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
+void xfrm_unregister_mode_cbs(u8 mode);
static inline int xfrm_af2proto(unsigned int family)
{
@@ -853,7 +915,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
xfrm_pol_put(pols[i]);
}
-void __xfrm_state_destroy(struct xfrm_state *, bool);
+void __xfrm_state_destroy(struct xfrm_state *);
static inline void __xfrm_state_put(struct xfrm_state *x)
{
@@ -863,13 +925,7 @@ static inline void __xfrm_state_put(struct xfrm_state *x)
static inline void xfrm_state_put(struct xfrm_state *x)
{
if (refcount_dec_and_test(&x->refcnt))
- __xfrm_state_destroy(x, false);
-}
-
-static inline void xfrm_state_put_sync(struct xfrm_state *x)
-{
- if (refcount_dec_and_test(&x->refcnt))
- __xfrm_state_destroy(x, true);
+ __xfrm_state_destroy(x);
}
static inline void xfrm_state_hold(struct xfrm_state *x)
@@ -1707,7 +1763,7 @@ struct xfrmk_spdinfo {
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
bool task_valid);
@@ -1716,8 +1772,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
- struct netlink_ext_ack *extack);
+int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack);
int xfrm_init_state(struct xfrm_state *x);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
@@ -1729,6 +1784,15 @@ int xfrm_trans_queue(struct sk_buff *skb,
struct sk_buff *));
int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
+int xfrm4_tunnel_check_size(struct sk_buff *skb);
+#if IS_ENABLED(CONFIG_IPV6)
+int xfrm6_tunnel_check_size(struct sk_buff *skb);
+#else
+static inline int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+ return -EMSGSIZE;
+}
+#endif
#if IS_ENABLED(CONFIG_NET_PKTGEN)
int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
@@ -1832,12 +1896,16 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
u32 if_id);
struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m,
- struct xfrm_encap_tmpl *encap);
+ struct xfrm_encap_tmpl *encap,
+ struct net *net,
+ struct xfrm_user_offload *xuo,
+ struct netlink_ext_ack *extack);
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_bundles,
struct xfrm_kmaddress *k, struct net *net,
struct xfrm_encap_tmpl *encap, u32 if_id,
- struct netlink_ext_ack *extack);
+ struct netlink_ext_ack *extack,
+ struct xfrm_user_offload *xuo);
#endif
int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index bb03cee716b3..cac56e6b0869 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,7 +29,7 @@ struct xdp_buff_xsk {
dma_addr_t frame_dma;
struct xsk_buff_pool *pool;
struct list_head list_node;
-};
+} __aligned_largest;
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t))
@@ -53,6 +53,8 @@ struct xsk_buff_pool {
refcount_t users;
struct xdp_umem *umem;
struct work_struct work;
+ /* Protects generic receive in shared and non-shared umem mode. */
+ spinlock_t rx_lock;
struct list_head free_list;
struct list_head xskb_list;
u32 heads_cnt;
@@ -141,6 +143,14 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
+
+struct xdp_desc_ctx {
+ dma_addr_t dma;
+ struct xsk_tx_metadata *meta;
+};
+
+struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr);
+
static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
{
return xskb->dma;
@@ -183,7 +193,7 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
!(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
}
-static inline bool xp_mb_desc(struct xdp_desc *desc)
+static inline bool xp_mb_desc(const struct xdp_desc *desc)
{
return desc->options & XDP_PKT_CONTD;
}
@@ -230,8 +240,8 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb,
return orig_addr;
offset = xskb->xdp.data - xskb->xdp.data_hard_start;
- orig_addr -= offset;
offset += pool->headroom;
+ orig_addr -= offset;
return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}