summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/addrconf.h29
-rw-r--r--include/net/af_rxrpc.h2
-rw-r--r--include/net/af_unix.h81
-rw-r--r--include/net/ax25.h1
-rw-r--r--include/net/bluetooth/bluetooth.h1
-rw-r--r--include/net/bluetooth/hci.h44
-rw-r--r--include/net/bluetooth/hci_core.h218
-rw-r--r--include/net/bluetooth/hci_sync.h4
-rw-r--r--include/net/bluetooth/l2cap.h10
-rw-r--r--include/net/bluetooth/mgmt.h1
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/busy_poll.h21
-rw-r--r--include/net/cfg80211.h155
-rw-r--r--include/net/checksum.h2
-rw-r--r--include/net/devlink.h25
-rw-r--r--include/net/dropreason-core.h97
-rw-r--r--include/net/dropreason.h6
-rw-r--r--include/net/dsa.h6
-rw-r--r--include/net/dst.h2
-rw-r--r--include/net/dst_metadata.h7
-rw-r--r--include/net/fib_rules.h29
-rw-r--r--include/net/flow.h1
-rw-r--r--include/net/genetlink.h6
-rw-r--r--include/net/gro.h38
-rw-r--r--include/net/hotdata.h1
-rw-r--r--include/net/inet6_connection_sock.h2
-rw-r--r--include/net/inet6_hashtables.h2
-rw-r--r--include/net/inet_connection_sock.h33
-rw-r--r--include/net/inet_frag.h6
-rw-r--r--include/net/inet_hashtables.h11
-rw-r--r--include/net/inet_sock.h8
-rw-r--r--include/net/inet_timewait_sock.h4
-rw-r--r--include/net/ip.h26
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ip_tunnels.h12
-rw-r--r--include/net/ipcomp.h13
-rw-r--r--include/net/ipv6.h24
-rw-r--r--include/net/ipv6_frag.h5
-rw-r--r--include/net/iucv/iucv.h30
-rw-r--r--include/net/l3mdev.h27
-rw-r--r--include/net/libeth/rx.h47
-rw-r--r--include/net/lwtunnel.h12
-rw-r--r--include/net/mac80211.h110
-rw-r--r--include/net/macsec.h4
-rw-r--r--include/net/mana/gdma.h18
-rw-r--r--include/net/mana/mana.h4
-rw-r--r--include/net/mctp.h2
-rw-r--r--include/net/mptcp.h19
-rw-r--r--include/net/net_namespace.h5
-rw-r--r--include/net/netdev_lock.h107
-rw-r--r--include/net/netdev_netlink.h12
-rw-r--r--include/net/netdev_queues.h21
-rw-r--r--include/net/netdev_rx_queue.h3
-rw-r--r--include/net/netfilter/nf_conntrack.h18
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h12
-rw-r--r--include/net/netfilter/nf_flow_table.h1
-rw-r--r--include/net/netfilter/nf_tables.h8
-rw-r--r--include/net/netfilter/nf_tproxy.h4
-rw-r--r--include/net/netfilter/nft_fib.h30
-rw-r--r--include/net/netlink.h59
-rw-r--r--include/net/netmem.h99
-rw-r--r--include/net/netns/ipv4.h5
-rw-r--r--include/net/page_pool/helpers.h82
-rw-r--r--include/net/page_pool/memory_provider.h51
-rw-r--r--include/net/page_pool/types.h21
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/route.h42
-rw-r--r--include/net/rps.h2
-rw-r--r--include/net/rtnetlink.h40
-rw-r--r--include/net/sch_generic.h23
-rw-r--r--include/net/scm.h12
-rw-r--r--include/net/sctp/checksum.h7
-rw-r--r--include/net/sctp/sctp.h2
-rw-r--r--include/net/sctp/structs.h3
-rw-r--r--include/net/snmp.h5
-rw-r--r--include/net/sock.h86
-rw-r--r--include/net/strparser.h2
-rw-r--r--include/net/tcp.h134
-rw-r--r--include/net/tls.h3
-rw-r--r--include/net/vxlan.h1
-rw-r--r--include/net/xdp.h194
-rw-r--r--include/net/xdp_sock.h13
-rw-r--r--include/net/xdp_sock_drv.h73
-rw-r--r--include/net/xfrm.h77
-rw-r--r--include/net/xsk_buff_pool.h16
85 files changed, 1867 insertions, 619 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 363dd63babe7..9e5e95988b9e 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -88,6 +88,23 @@ struct ifa6_config {
u16 scope;
};
+enum addr_type_t {
+ UNICAST_ADDR,
+ MULTICAST_ADDR,
+ ANYCAST_ADDR,
+};
+
+struct inet6_fill_args {
+ u32 portid;
+ u32 seq;
+ int event;
+ unsigned int flags;
+ int netnsid;
+ int ifindex;
+ enum addr_type_t type;
+ bool force_rt_scope_universe;
+};
+
int addrconf_init(void);
void addrconf_cleanup(void);
@@ -330,6 +347,11 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev)
return rcu_dereference_rtnl(dev->ip6_ptr);
}
+static inline struct inet6_dev *__in6_dev_get_rtnl_net(const struct net_device *dev)
+{
+ return rtnl_net_dereference(dev_net(dev), dev->ip6_ptr);
+}
+
/**
* __in6_dev_stats_get - get inet6_dev pointer for stats
* @dev: network device
@@ -525,4 +547,11 @@ int if6_proc_init(void);
void if6_proc_exit(void);
#endif
+int inet6_fill_ifmcaddr(struct sk_buff *skb,
+ const struct ifmcaddr6 *ifmca,
+ struct inet6_fill_args *args);
+
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
+ struct inet6_fill_args *args);
#endif
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 0754c463224a..cf793d18e5df 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -69,6 +69,8 @@ struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer);
struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call);
const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer);
const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer);
+unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data);
+unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer);
unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 63129c79b8cb..1af1841b7601 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -2,11 +2,15 @@
#ifndef __LINUX_NET_AFUNIX_H
#define __LINUX_NET_AFUNIX_H
-#include <linux/socket.h>
-#include <linux/un.h>
+#include <linux/atomic.h>
#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/path.h>
#include <linux/refcount.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
#include <net/sock.h>
+#include <uapi/linux/un.h>
#if IS_ENABLED(CONFIG_UNIX)
struct unix_sock *unix_get_socket(struct file *filp);
@@ -17,61 +21,17 @@ static inline struct unix_sock *unix_get_socket(struct file *filp)
}
#endif
-extern unsigned int unix_tot_inflight;
-void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver);
-void unix_del_edges(struct scm_fp_list *fpl);
-void unix_update_edges(struct unix_sock *receiver);
-int unix_prepare_fpl(struct scm_fp_list *fpl);
-void unix_destroy_fpl(struct scm_fp_list *fpl);
-void unix_gc(void);
-void wait_for_unix_gc(struct scm_fp_list *fpl);
-
-struct unix_vertex {
- struct list_head edges;
- struct list_head entry;
- struct list_head scc_entry;
- unsigned long out_degree;
- unsigned long index;
- unsigned long scc_index;
-};
-
-struct unix_edge {
- struct unix_sock *predecessor;
- struct unix_sock *successor;
- struct list_head vertex_entry;
- struct list_head stack_entry;
-};
-
-struct sock *unix_peer_get(struct sock *sk);
-
-#define UNIX_HASH_MOD (256 - 1)
-#define UNIX_HASH_SIZE (256 * 2)
-#define UNIX_HASH_BITS 8
-
struct unix_address {
refcount_t refcnt;
int len;
struct sockaddr_un name[];
};
-struct unix_skb_parms {
- struct pid *pid; /* Skb credentials */
- kuid_t uid;
- kgid_t gid;
- struct scm_fp_list *fp; /* Passed files */
-#ifdef CONFIG_SECURITY_NETWORK
- u32 secid; /* Security ID */
-#endif
- u32 consumed;
-} __randomize_layout;
-
struct scm_stat {
atomic_t nr_fds;
unsigned long nr_unix_fds;
};
-#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
-
/* The AF_UNIX socket */
struct unix_sock {
/* WARNING: sk has to be the first member */
@@ -84,6 +44,7 @@ struct unix_sock {
struct unix_vertex *vertex;
spinlock_t lock;
struct socket_wq peer_wq;
+#define peer_wait peer_wq.wait
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
@@ -97,32 +58,4 @@ struct unix_sock {
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-#define peer_wait peer_wq.wait
-
-long unix_inq_len(struct sock *sk);
-long unix_outq_len(struct sock *sk);
-
-int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
- int flags);
-int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
- int flags);
-#ifdef CONFIG_SYSCTL
-int unix_sysctl_register(struct net *net);
-void unix_sysctl_unregister(struct net *net);
-#else
-static inline int unix_sysctl_register(struct net *net) { return 0; }
-static inline void unix_sysctl_unregister(struct net *net) {}
-#endif
-
-#ifdef CONFIG_BPF_SYSCALL
-extern struct proto unix_dgram_proto;
-extern struct proto unix_stream_proto;
-
-int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
-int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
-void __init unix_bpf_build_proto(void);
-#else
-static inline void __init unix_bpf_build_proto(void)
-{}
-#endif
#endif
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 4ee141aae0a2..a7bba42dde15 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -418,7 +418,6 @@ void ax25_rt_device_down(struct net_device *);
int ax25_rt_ioctl(unsigned int, void __user *);
extern const struct seq_operations ax25_rt_seqops;
ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev);
-int ax25_rt_autobind(ax25_cb *, ax25_address *);
struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *,
ax25_address *, ax25_digi *);
void ax25_rt_free(void);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 435250c72d56..bbefde319f95 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -156,6 +156,7 @@ struct bt_voice {
#define BT_PKT_STATUS 16
#define BT_SCM_PKT_STATUS 0x03
+#define BT_SCM_ERROR 0x04
#define BT_ISO_QOS 17
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 6203bd8663b7..521a9d0acac6 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -208,6 +208,13 @@ enum {
*/
HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+ /* When this quirk is set consider Sync Flow Control as supported by
+ * the driver.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED,
+
/* When this quirk is set, the LE states reported through the
* HCI_LE_READ_SUPPORTED_STATES are invalid/broken.
*
@@ -354,6 +361,22 @@ enum {
* during the hdev->setup vendor callback.
*/
HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
+
+ /* When this quirk is set, the HCI_OP_READ_VOICE_SETTING command is
+ * skipped. This is required for a subset of the CSR controller clones
+ * which erroneously claim to support it.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_BROKEN_READ_VOICE_SETTING,
+
+ /* When this quirk is set, the HCI_OP_READ_PAGE_SCAN_TYPE command is
+ * skipped. This is required for a subset of the CSR controller clones
+ * which erroneously claim to support it.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE,
};
/* HCI device flags */
@@ -432,13 +455,13 @@ enum {
HCI_WIDEBAND_SPEECH_ENABLED,
HCI_EVENT_FILTER_CONFIGURED,
HCI_PA_SYNC,
+ HCI_SCO_FLOWCTL,
HCI_DUT_MODE,
HCI_VENDOR_DIAG,
HCI_FORCE_BREDR_SMP,
HCI_FORCE_STATIC_ADDR,
HCI_LL_RPA_RESOLUTION,
- HCI_ENABLE_LL_PRIVACY,
HCI_CMD_PENDING,
HCI_FORCE_NO_MITM,
HCI_QUALITY_REPORT,
@@ -534,7 +557,8 @@ enum {
#define ESCO_LINK 0x02
/* Low Energy links do not have defined link type. Use invented one */
#define LE_LINK 0x80
-#define ISO_LINK 0x82
+#define CIS_LINK 0x82
+#define BIS_LINK 0x83
#define INVALID_LINK 0xff
/* LMP features */
@@ -684,7 +708,7 @@ enum {
#define HCI_ERROR_REMOTE_POWER_OFF 0x15
#define HCI_ERROR_LOCAL_HOST_TERM 0x16
#define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18
-#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e
+#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1a
#define HCI_ERROR_INVALID_LL_PARAMS 0x1e
#define HCI_ERROR_UNSPECIFIED 0x1f
#define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c
@@ -856,6 +880,11 @@ struct hci_cp_remote_name_req_cancel {
bdaddr_t bdaddr;
} __packed;
+struct hci_rp_remote_name_req_cancel {
+ __u8 status;
+ bdaddr_t bdaddr;
+} __packed;
+
#define HCI_OP_READ_REMOTE_FEATURES 0x041b
struct hci_cp_read_remote_features {
__le16 handle;
@@ -1529,6 +1558,11 @@ struct hci_rp_read_tx_power {
__s8 tx_power;
} __packed;
+#define HCI_OP_WRITE_SYNC_FLOWCTL 0x0c2f
+struct hci_cp_write_sync_flowctl {
+ __u8 enable;
+} __packed;
+
#define HCI_OP_READ_PAGE_SCAN_TYPE 0x0c46
struct hci_rp_read_page_scan_type {
__u8 status;
@@ -1898,6 +1932,8 @@ struct hci_cp_le_pa_create_sync {
__u8 sync_cte_type;
} __packed;
+#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045
+
#define HCI_OP_LE_PA_TERM_SYNC 0x2046
struct hci_cp_le_pa_term_sync {
__le16 handle;
@@ -2797,7 +2833,7 @@ struct hci_evt_le_create_big_complete {
__le16 bis_handle[];
} __packed;
-#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d
+#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d
struct hci_evt_le_big_sync_estabilished {
__u8 status;
__u8 handle;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ca22ead85dbe..d15316bffd70 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -157,8 +157,9 @@ struct bdaddr_list_with_irk {
/* Bitmask of connection flags */
enum hci_conn_flags {
- HCI_CONN_FLAG_REMOTE_WAKEUP = 1,
- HCI_CONN_FLAG_DEVICE_PRIVACY = 2,
+ HCI_CONN_FLAG_REMOTE_WAKEUP = BIT(0),
+ HCI_CONN_FLAG_DEVICE_PRIVACY = BIT(1),
+ HCI_CONN_FLAG_ADDRESS_RESOLUTION = BIT(2),
};
typedef u8 hci_conn_flags_t;
@@ -260,6 +261,12 @@ struct adv_info {
struct delayed_work rpa_expired_cb;
};
+struct tx_queue {
+ struct sk_buff_head queue;
+ unsigned int extra;
+ unsigned int tracked;
+};
+
#define HCI_MAX_ADV_INSTANCES 5
#define HCI_DEFAULT_ADV_DURATION 2
@@ -538,6 +545,7 @@ struct hci_dev {
struct hci_conn_hash conn_hash;
struct list_head mesh_pending;
+ struct mutex mgmt_pending_lock;
struct list_head mgmt_pending;
struct list_head reject_list;
struct list_head accept_list;
@@ -632,7 +640,6 @@ struct hci_dev {
int (*post_init)(struct hci_dev *hdev);
int (*set_diag)(struct hci_dev *hdev, bool enable);
int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
- void (*cmd_timeout)(struct hci_dev *hdev);
void (*reset)(struct hci_dev *hdev);
bool (*wakeup)(struct hci_dev *hdev);
int (*set_quality_report)(struct hci_dev *hdev, bool enable);
@@ -733,6 +740,8 @@ struct hci_conn {
struct sk_buff_head data_q;
struct list_head chan_list;
+ struct tx_queue tx_q;
+
struct delayed_work disc_work;
struct delayed_work auto_accept_work;
struct delayed_work idle_work;
@@ -804,6 +813,7 @@ struct hci_conn_params {
extern struct list_head hci_dev_list;
extern struct list_head hci_cb_list;
extern rwlock_t hci_dev_list_lock;
+extern struct mutex hci_cb_list_lock;
#define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags)
#define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags)
@@ -987,7 +997,8 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c)
case ESCO_LINK:
h->sco_num++;
break;
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
h->iso_num++;
break;
}
@@ -1013,7 +1024,8 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c)
case ESCO_LINK:
h->sco_num--;
break;
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
h->iso_num--;
break;
}
@@ -1030,7 +1042,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type)
case SCO_LINK:
case ESCO_LINK:
return h->sco_num;
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
return h->iso_num;
default:
return 0;
@@ -1091,7 +1104,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (bacmp(&c->dst, ba) || c->type != ISO_LINK)
+ if (bacmp(&c->dst, ba) || c->type != BIS_LINK)
continue;
if (c->iso_qos.bcast.bis == bis) {
@@ -1104,10 +1117,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
return NULL;
}
-static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev,
- __u8 sid,
- bdaddr_t *dst,
- __u8 dst_type)
+static inline struct hci_conn *
+hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
@@ -1115,8 +1126,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK || bacmp(&c->dst, dst) ||
- c->dst_type != dst_type || c->sid != sid)
+ if (c->type != BIS_LINK)
+ continue;
+
+ if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags))
continue;
rcu_read_unlock();
@@ -1139,8 +1152,8 @@ hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (bacmp(&c->dst, ba) || c->type != ISO_LINK ||
- !test_bit(HCI_CONN_PER_ADV, &c->flags))
+ if (bacmp(&c->dst, ba) || c->type != BIS_LINK ||
+ !test_bit(HCI_CONN_PER_ADV, &c->flags))
continue;
if (c->iso_qos.bcast.big == big &&
@@ -1229,7 +1242,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
+ if (c->type != CIS_LINK)
continue;
/* Match CIG ID if set */
@@ -1261,7 +1274,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
+ if (c->type != CIS_LINK)
continue;
if (handle == c->iso_qos.ucast.cig) {
@@ -1284,17 +1297,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
- continue;
-
- /* An ISO_LINK hcon with BDADDR_ANY as destination
- * address is a Broadcast connection. A Broadcast
- * slave connection is associated with a PA train,
- * so the sync_handle can be used to differentiate
- * from unicast.
- */
- if (bacmp(&c->dst, BDADDR_ANY) &&
- c->sync_handle == HCI_SYNC_HANDLE_INVALID)
+ if (c->type != BIS_LINK)
continue;
if (handle == c->iso_qos.bcast.big) {
@@ -1318,7 +1321,7 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != BIS_LINK)
continue;
if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) {
@@ -1341,8 +1344,8 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK ||
- c->state != state)
+ if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) ||
+ c->state != state)
continue;
if (handle == c->iso_qos.bcast.big) {
@@ -1365,8 +1368,8 @@ hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK ||
- !test_bit(HCI_CONN_PA_SYNC, &c->flags))
+ if (c->type != BIS_LINK ||
+ !test_bit(HCI_CONN_PA_SYNC, &c->flags))
continue;
if (c->iso_qos.bcast.big == big) {
@@ -1388,7 +1391,7 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle)
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != BIS_LINK)
continue;
/* Ignore the listen hcon, we are looking
@@ -1515,8 +1518,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
void hci_sco_setup(struct hci_conn *conn, __u8 status);
bool hci_iso_setup_path(struct hci_conn *conn);
int hci_le_create_cis_pending(struct hci_dev *hdev);
-int hci_pa_create_sync_pending(struct hci_dev *hdev);
-int hci_le_big_create_sync_pending(struct hci_dev *hdev);
int hci_conn_check_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
@@ -1557,9 +1558,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 data_len, __u8 *data);
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid, struct bt_iso_qos *qos);
-int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
- struct bt_iso_qos *qos,
- __u16 sync_handle, __u8 num_bis, __u8 bis[]);
+int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos, __u16 sync_handle,
+ __u8 num_bis, __u8 bis[]);
int hci_conn_check_link_mode(struct hci_conn *conn);
int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
@@ -1571,6 +1572,18 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
void hci_conn_failed(struct hci_conn *conn, u8 status);
u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);
+void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb);
+void hci_conn_tx_dequeue(struct hci_conn *conn);
+void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset,
+ const struct sockcm_cookie *sockc);
+
+static inline void hci_sockcm_init(struct sockcm_cookie *sockc, struct sock *sk)
+{
+ *sockc = (struct sockcm_cookie) {
+ .tsflags = READ_ONCE(sk->sk_tsflags),
+ };
+}
+
/*
* hci_conn_get() and hci_conn_put() are used to control the life-time of an
* "hci_conn" object. They do not guarantee that the hci_conn object is running,
@@ -1759,8 +1772,6 @@ int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type);
int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
u8 type);
-int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr,
- u8 type);
void hci_bdaddr_list_clear(struct list_head *list);
struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
@@ -1781,6 +1792,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
void hci_uuids_clear(struct hci_dev *hdev);
void hci_link_keys_clear(struct hci_dev *hdev);
+u8 *hci_conn_key_enc_size(struct hci_conn *conn);
struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr);
struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn,
bdaddr_t *bdaddr, u8 *val, u8 type,
@@ -1859,6 +1871,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define lmp_hold_capable(dev) ((dev)->features[0][0] & LMP_HOLD)
#define lmp_sniff_capable(dev) ((dev)->features[0][0] & LMP_SNIFF)
#define lmp_park_capable(dev) ((dev)->features[0][1] & LMP_PARK)
+#define lmp_sco_capable(dev) ((dev)->features[0][1] & LMP_SCO)
#define lmp_inq_rssi_capable(dev) ((dev)->features[0][3] & LMP_RSSI_INQ)
#define lmp_esco_capable(dev) ((dev)->features[0][3] & LMP_ESCO)
#define lmp_bredr_capable(dev) (!((dev)->features[0][4] & LMP_NO_BREDR))
@@ -1919,17 +1932,17 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY)
-/* Use LL Privacy based address resolution if supported */
-#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \
- hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY))
-
-#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \
+#define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \
(hdev->commands[39] & 0x04))
#define read_key_size_capable(dev) \
((dev)->commands[20] & 0x10 && \
!test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks))
+#define read_voice_setting_capable(dev) \
+ ((dev)->commands[9] & 0x04 && \
+ !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks))
+
/* Use enhanced synchronous connection if command is supported and its quirk
* has not been set.
*/
@@ -1993,7 +2006,8 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
case ESCO_LINK:
return sco_connect_ind(hdev, bdaddr, flags);
- case ISO_LINK:
+ case CIS_LINK:
+ case BIS_LINK:
return iso_connect_ind(hdev, bdaddr, flags);
default:
@@ -2016,47 +2030,24 @@ struct hci_cb {
char *name;
- bool (*match) (struct hci_conn *conn);
void (*connect_cfm) (struct hci_conn *conn, __u8 status);
void (*disconn_cfm) (struct hci_conn *conn, __u8 status);
void (*security_cfm) (struct hci_conn *conn, __u8 status,
- __u8 encrypt);
+ __u8 encrypt);
void (*key_change_cfm) (struct hci_conn *conn, __u8 status);
void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role);
};
-static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list)
-{
- struct hci_cb *cb, *cpy;
-
- rcu_read_lock();
- list_for_each_entry_rcu(cb, &hci_cb_list, list) {
- if (cb->match && cb->match(conn)) {
- cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC);
- if (!cpy)
- break;
-
- *cpy = *cb;
- INIT_LIST_HEAD(&cpy->list);
- list_add_rcu(&cpy->list, list);
- }
- }
- rcu_read_unlock();
-}
-
static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->connect_cfm)
cb->connect_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->connect_cfm_cb)
conn->connect_cfm_cb(conn, status);
@@ -2064,43 +2055,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->disconn_cfm)
cb->disconn_cfm(conn, reason);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->disconn_cfm_cb)
conn->disconn_cfm_cb(conn, reason);
}
-static inline void hci_security_cfm(struct hci_conn *conn, __u8 status,
- __u8 encrypt)
-{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
- if (cb->security_cfm)
- cb->security_cfm(conn, status, encrypt);
- kfree(cb);
- }
-
- if (conn->security_cfm_cb)
- conn->security_cfm_cb(conn, status);
-}
-
static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
@@ -2108,11 +2078,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (conn->state == BT_CONFIG) {
@@ -2139,38 +2118,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
conn->sec_level = conn->pending_sec_level;
}
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
+ struct hci_cb *cb;
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->key_change_cfm)
cb->key_change_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
__u8 role)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->role_switch_cfm)
cb->role_switch_cfm(conn, status, role);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type)
@@ -2392,8 +2373,6 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
u8 status);
void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
-void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status);
-void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status);
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len,
@@ -2419,7 +2398,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
u8 instance);
void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
u8 instance);
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle);
int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
bdaddr_t *bdaddr, u8 addr_type);
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index f3052cb252ef..72558c826aa1 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -140,7 +140,6 @@ int hci_update_scan(struct hci_dev *hdev);
int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul);
int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk);
-int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance);
struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext,
struct sock *sk);
@@ -186,3 +185,6 @@ int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn);
int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn);
int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn,
struct hci_conn_params *params);
+
+int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn);
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d9c767cf773d..4bb0eaedda18 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -38,8 +38,8 @@
#define L2CAP_DEFAULT_TX_WINDOW 63
#define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF
#define L2CAP_DEFAULT_MAX_TX 3
-#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */
-#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */
+#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */
+#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */
#define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */
#define L2CAP_DEFAULT_ACK_TO 200
#define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF
@@ -668,7 +668,7 @@ struct l2cap_conn {
struct l2cap_chan *smp;
struct list_head chan_l;
- struct mutex chan_lock;
+ struct mutex lock;
struct kref ref;
struct list_head users;
};
@@ -955,7 +955,8 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason);
int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
bdaddr_t *dst, u8 dst_type, u16 timeout);
int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu);
-int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len);
+int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
+ const struct sockcm_cookie *sockc);
void l2cap_chan_busy(struct l2cap_chan *chan, int busy);
void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail);
int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator);
@@ -970,6 +971,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err);
void l2cap_send_conn_req(struct l2cap_chan *chan);
struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn);
+struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *conn);
void l2cap_conn_put(struct l2cap_conn *conn);
int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index affac861efdc..3575cd16049a 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -113,6 +113,7 @@ struct mgmt_rp_read_index_list {
#define MGMT_SETTING_CIS_PERIPHERAL BIT(19)
#define MGMT_SETTING_ISO_BROADCASTER BIT(20)
#define MGMT_SETTING_ISO_SYNC_RECEIVER BIT(21)
+#define MGMT_SETTING_LL_PRIVACY BIT(22)
#define MGMT_OP_READ_INFO 0x0004
#define MGMT_READ_INFO_SIZE 0
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 8bb5f016969f..95f67b308c19 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -695,6 +695,7 @@ void bond_debug_register(struct bonding *bond);
void bond_debug_unregister(struct bonding *bond);
void bond_debug_reregister(struct bonding *bond);
const char *bond_mode_name(int mode);
+bool bond_xdp_check(struct bonding *bond, int mode);
void bond_setup(struct net_device *bond_dev);
unsigned int bond_get_num_tx_queues(void);
int bond_netlink_init(void);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c39a426ebf52..6e172d0f6ef5 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -24,6 +24,11 @@
*/
#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
+static inline bool napi_id_valid(unsigned int napi_id)
+{
+ return napi_id >= MIN_NAPI_ID;
+}
+
#define BUSY_POLL_BUDGET 8
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -114,7 +119,7 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
- if (napi_id >= MIN_NAPI_ID)
+ if (napi_id_valid(napi_id))
napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk,
READ_ONCE(sk->sk_prefer_busy_poll),
READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET);
@@ -122,18 +127,24 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
}
/* used in the NIC receive handler to mark the skb */
-static inline void skb_mark_napi_id(struct sk_buff *skb,
- struct napi_struct *napi)
+static inline void __skb_mark_napi_id(struct sk_buff *skb,
+ const struct gro_node *gro)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
/* If the skb was already marked with a valid NAPI ID, avoid overwriting
* it.
*/
- if (skb->napi_id < MIN_NAPI_ID)
- skb->napi_id = napi->napi_id;
+ if (!napi_id_valid(skb->napi_id))
+ skb->napi_id = gro->cached_napi_id;
#endif
}
+static inline void skb_mark_napi_id(struct sk_buff *skb,
+ const struct napi_struct *napi)
+{
+ __skb_mark_napi_id(skb, &napi->gro);
+}
+
/* used in the protocol handler to propagate the napi_id to the socket */
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 27acf1292a5c..efbd79c67be2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <linux/ethtool.h>
@@ -127,6 +127,8 @@ struct wiphy;
* even if it is otherwise disabled.
* @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation
* with very low power (VLP), even if otherwise set to NO_IR.
+ * @IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY: Allow activity on a 20 MHz channel,
+ * even if otherwise set to NO_IR.
*/
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = BIT(0),
@@ -155,6 +157,7 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23),
IEEE80211_CHAN_CAN_MONITOR = BIT(24),
IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25),
+ IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY = BIT(26),
};
#define IEEE80211_CHAN_NO_HT40 \
@@ -1006,6 +1009,17 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
/**
+ * cfg80211_chandef_get_width - return chandef width in MHz
+ * @c: chandef to return bandwidth for
+ * Return: channel width in MHz for the given chandef; note that it returns
+ * 80 for 80+80 configurations
+ */
+static inline int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
+{
+ return nl80211_chan_width_to_mhz(c->width);
+}
+
+/**
* cfg80211_chandef_valid - check if a channel definition is valid
* @chandef: the channel definition to check
* Return: %true if the channel definition is valid. %false otherwise.
@@ -2045,9 +2059,6 @@ struct cfg80211_tid_stats {
* @assoc_at: bootime (ns) of the last association
* @rx_bytes: bytes (size of MPDUs) received from this station
* @tx_bytes: bytes (size of MPDUs) transmitted to this station
- * @llid: mesh local link id
- * @plid: mesh peer link id
- * @plink_state: mesh peer link state
* @signal: The signal strength, type depends on the wiphy's signal_type.
* For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_.
* @signal_avg: Average signal strength, type depends on the wiphy's signal_type.
@@ -2067,14 +2078,20 @@ struct cfg80211_tid_stats {
* This number should increase every time the list of stations
* changes, i.e. when a station is added or removed, so that
* userspace can tell whether it got a consistent snapshot.
+ * @beacon_loss_count: Number of times beacon loss event has triggered.
* @assoc_req_ies: IEs from (Re)Association Request.
* This is used only when in AP mode with drivers that do not use
* user space MLME/SME implementation. The information is provided for
* the cfg80211_new_sta() calls to notify user space of the IEs.
* @assoc_req_ies_len: Length of assoc_req_ies buffer in octets.
* @sta_flags: station flags mask & values
- * @beacon_loss_count: Number of times beacon loss event has triggered.
* @t_offset: Time offset of the station relative to this host.
+ * @llid: mesh local link id
+ * @plid: mesh peer link id
+ * @plink_state: mesh peer link state
+ * @connected_to_gate: true if mesh STA has a path to mesh gate
+ * @connected_to_as: true if mesh STA has a path to authentication server
+ * @airtime_link_metric: mesh airtime link metric.
* @local_pm: local mesh STA power save mode
* @peer_pm: peer mesh STA power save mode
* @nonpeer_pm: non-peer mesh STA power save mode
@@ -2083,7 +2100,6 @@ struct cfg80211_tid_stats {
* @rx_beacon: number of beacons received from this peer
* @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
* from this peer
- * @connected_to_gate: true if mesh STA has a path to mesh gate
* @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
* @tx_duration: aggregate PPDU duration(usecs) for all the frames to a peer
* @airtime_weight: current airtime scheduling weight
@@ -2097,8 +2113,6 @@ struct cfg80211_tid_stats {
* @fcs_err_count: number of packets (MPDUs) received from this station with
* an FCS error. This counter should be incremented only when TA of the
* received packet with an FCS error matches the peer MAC address.
- * @airtime_link_metric: mesh airtime link metric.
- * @connected_to_as: true if mesh STA has a path to authentication server
* @mlo_params_valid: Indicates @assoc_link_id and @mld_addr fields are filled
* by driver. Drivers use this only in cfg80211_new_sta() calls when AP
* MLD's MLME/SME is offload to driver. Drivers won't fill this
@@ -2125,9 +2139,6 @@ struct station_info {
u64 assoc_at;
u64 rx_bytes;
u64 tx_bytes;
- u16 llid;
- u16 plid;
- u8 plink_state;
s8 signal;
s8 signal_avg;
@@ -2147,36 +2158,38 @@ struct station_info {
int generation;
+ u32 beacon_loss_count;
+
const u8 *assoc_req_ies;
size_t assoc_req_ies_len;
- u32 beacon_loss_count;
s64 t_offset;
+ u16 llid;
+ u16 plid;
+ u8 plink_state;
+ u8 connected_to_gate;
+ u8 connected_to_as;
+ u32 airtime_link_metric;
enum nl80211_mesh_power_mode local_pm;
enum nl80211_mesh_power_mode peer_pm;
enum nl80211_mesh_power_mode nonpeer_pm;
u32 expected_throughput;
- u64 tx_duration;
- u64 rx_duration;
- u64 rx_beacon;
- u8 rx_beacon_signal_avg;
- u8 connected_to_gate;
+ u16 airtime_weight;
- struct cfg80211_tid_stats *pertid;
s8 ack_signal;
s8 avg_ack_signal;
+ struct cfg80211_tid_stats *pertid;
- u16 airtime_weight;
+ u64 tx_duration;
+ u64 rx_duration;
+ u64 rx_beacon;
+ u8 rx_beacon_signal_avg;
u32 rx_mpdu_count;
u32 fcs_err_count;
- u32 airtime_link_metric;
-
- u8 connected_to_as;
-
bool mlo_params_valid;
u8 assoc_link_id;
u8 mld_addr[ETH_ALEN] __aligned(2);
@@ -2265,7 +2278,7 @@ static inline int cfg80211_get_station(struct net_device *dev,
* @MONITOR_FLAG_PLCPFAIL: pass frames with bad PLCP
* @MONITOR_FLAG_CONTROL: pass control frames
* @MONITOR_FLAG_OTHER_BSS: disable BSSID filtering
- * @MONITOR_FLAG_COOK_FRAMES: report frames after processing
+ * @MONITOR_FLAG_COOK_FRAMES: deprecated, will unconditionally be refused
* @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address
* @MONITOR_FLAG_SKIP_TX: do not pass locally transmitted frames
*/
@@ -2947,6 +2960,7 @@ struct cfg80211_bss_ies {
* @nontrans_list: list of non-transmitted BSS, if this is a transmitted one
* (multi-BSSID support)
* @signal: signal strength value (type depends on the wiphy's signal_type)
+ * @ts_boottime: timestamp of the last BSS update in nanoseconds since boot
* @chains: bitmask for filled values in @chain_signal.
* @chain_signal: per-chain signal strength of last received BSS in dBm.
* @bssid_index: index in the multiple BSS set
@@ -2971,6 +2985,8 @@ struct cfg80211_bss {
s32 signal;
+ u64 ts_boottime;
+
u16 beacon_interval;
u16 capability;
@@ -3023,6 +3039,10 @@ static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id)
*
* @bss: The BSS to authenticate with, the callee must obtain a reference
* to it if it needs to keep it.
+ * @supported_selectors: List of selectors that should be assumed to be
+ * supported by the station.
+ * SAE_H2E must be assumed supported if set to %NULL.
+ * @supported_selectors_len: Length of supported_selectors in octets.
* @auth_type: Authentication type (algorithm)
* @ie: Extra IEs to add to Authentication frame or %NULL
* @ie_len: Length of ie buffer in octets
@@ -3045,6 +3065,8 @@ struct cfg80211_auth_request {
struct cfg80211_bss *bss;
const u8 *ie;
size_t ie_len;
+ const u8 *supported_selectors;
+ u8 supported_selectors_len;
enum nl80211_auth_type auth_type;
const u8 *key;
u8 key_len;
@@ -3075,6 +3097,19 @@ struct cfg80211_assoc_link {
};
/**
+ * struct cfg80211_ml_reconf_req - MLO link reconfiguration request
+ * @add_links: data for links to add, see &struct cfg80211_assoc_link
+ * @rem_links: bitmap of links to remove
+ * @ext_mld_capa_ops: extended MLD capabilities and operations set by
+ * userspace for the ML reconfiguration action frame
+ */
+struct cfg80211_ml_reconf_req {
+ struct cfg80211_assoc_link add_links[IEEE80211_MLD_MAX_NUM_LINKS];
+ u16 rem_links;
+ u16 ext_mld_capa_ops;
+};
+
+/**
* enum cfg80211_assoc_req_flags - Over-ride default behaviour in association.
*
* @ASSOC_REQ_DISABLE_HT: Disable HT (802.11n)
@@ -3124,6 +3159,10 @@ enum cfg80211_assoc_req_flags {
* included in the Current AP address field of the Reassociation Request
* frame.
* @flags: See &enum cfg80211_assoc_req_flags
+ * @supported_selectors: supported BSS selectors in IEEE 802.11 format
+ * (or %NULL for no change).
+ * If %NULL, then support for SAE_H2E should be assumed.
+ * @supported_selectors_len: number of supported BSS selectors
* @ht_capa: HT Capabilities over-rides. Values set in ht_capa_mask
* will be used in ht_capa. Un-supported values will be ignored.
* @ht_capa_mask: The bits of ht_capa which are to be used.
@@ -3142,6 +3181,8 @@ enum cfg80211_assoc_req_flags {
* the link on which the association request should be sent
* @ap_mld_addr: AP MLD address in case of MLO association request,
* valid iff @link_id >= 0
+ * @ext_mld_capa_ops: extended MLD capabilities and operations set by
+ * userspace for the association
*/
struct cfg80211_assoc_request {
struct cfg80211_bss *bss;
@@ -3150,6 +3191,8 @@ struct cfg80211_assoc_request {
struct cfg80211_crypto_settings crypto;
bool use_mfp;
u32 flags;
+ const u8 *supported_selectors;
+ u8 supported_selectors_len;
struct ieee80211_ht_cap ht_capa;
struct ieee80211_ht_cap ht_capa_mask;
struct ieee80211_vht_cap vht_capa, vht_capa_mask;
@@ -3160,6 +3203,7 @@ struct cfg80211_assoc_request {
struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS];
const u8 *ap_mld_addr;
s8 link_id;
+ u16 ext_mld_capa_ops;
};
/**
@@ -4582,8 +4626,18 @@ struct mgmt_frame_regs {
*
* @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
* @set_ttlm: set the TID to link mapping.
+ * @set_epcs: Enable/Disable EPCS for station mode.
* @get_radio_mask: get bitmask of radios in use.
* (invoked with the wiphy mutex held)
+ * @assoc_ml_reconf: Request a non-AP MLO connection to perform ML
+ * reconfiguration, i.e., add and/or remove links to/from the
+ * association using ML reconfiguration action frames. Successfully added
+ * links will be added to the set of valid links. Successfully removed
+ * links will be removed from the set of valid links. The driver must
+ * indicate removed links by calling cfg80211_links_removed() and added
+ * links by calling cfg80211_mlo_reconf_add_done(). When calling
+ * cfg80211_mlo_reconf_add_done() the bss pointer must be given for each
+ * link for which MLO reconfiguration 'add' operation was requested.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4733,7 +4787,7 @@ struct cfg80211_ops {
int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
enum nl80211_tx_power_setting type, int mbm);
int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
- int *dbm);
+ unsigned int link_id, int *dbm);
void (*rfkill_poll)(struct wiphy *wiphy);
@@ -4947,6 +5001,10 @@ struct cfg80211_ops {
int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ttlm_params *params);
u32 (*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev);
+ int (*assoc_ml_reconf)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_ml_reconf_req *req);
+ int (*set_epcs)(struct wiphy *wiphy, struct net_device *dev,
+ bool val);
};
/*
@@ -5957,7 +6015,7 @@ int wiphy_register(struct wiphy *wiphy);
* @wiphy: the wiphy to check the locking on
* @p: The pointer to read, prior to dereferencing
*
- * Return the value of the specified RCU-protected pointer, but omit the
+ * Return: the value of the specified RCU-protected pointer, but omit the
* READ_ONCE(), because caller holds the wiphy mutex used for updates.
*/
#define wiphy_dereference(wiphy, p) \
@@ -6031,6 +6089,10 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
mutex_unlock(&wiphy->mtx);
}
+DEFINE_GUARD(wiphy, struct wiphy *,
+ mutex_lock(&_T->mtx),
+ mutex_unlock(&_T->mtx))
+
struct wiphy_work;
typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *);
@@ -9701,6 +9763,40 @@ static inline int cfg80211_color_change_notify(struct net_device *dev,
void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
/**
+ * struct cfg80211_mlo_reconf_done_data - MLO reconfiguration data
+ * @buf: MLO Reconfiguration Response frame (header + body)
+ * @len: length of the frame data
+ * @added_links: BIT mask of links successfully added to the association
+ * @links: per-link information indexed by link ID
+ * @links.bss: the BSS that MLO reconfiguration was requested for, ownership of
+ * the pointer moves to cfg80211 in the call to
+ * cfg80211_mlo_reconf_add_done().
+ *
+ * The BSS pointer must be set for each link for which 'add' operation was
+ * requested in the assoc_ml_reconf callback.
+ */
+struct cfg80211_mlo_reconf_done_data {
+ const u8 *buf;
+ size_t len;
+ u16 added_links;
+ struct {
+ struct cfg80211_bss *bss;
+ u8 *addr;
+ } links[IEEE80211_MLD_MAX_NUM_LINKS];
+};
+
+/**
+ * cfg80211_mlo_reconf_add_done - Notify about MLO reconfiguration result
+ * @dev: network device.
+ * @data: MLO reconfiguration done data, &struct cfg80211_mlo_reconf_done_data
+ *
+ * Inform cfg80211 and the userspace that processing of ML reconfiguration
+ * request to add links to the association is done.
+ */
+void cfg80211_mlo_reconf_add_done(struct net_device *dev,
+ struct cfg80211_mlo_reconf_done_data *data);
+
+/**
* cfg80211_schedule_channels_check - schedule regulatory check if needed
* @wdev: the wireless device to check
*
@@ -9710,6 +9806,13 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
*/
void cfg80211_schedule_channels_check(struct wireless_dev *wdev);
+/**
+ * cfg80211_epcs_changed - Notify about a change in EPCS state
+ * @netdev: the wireless device whose EPCS state changed
+ * @enabled: set to true if EPCS was enabled, otherwise set to false.
+ */
+void cfg80211_epcs_changed(struct net_device *netdev, bool enabled);
+
#ifdef CONFIG_CFG80211_DEBUGFS
/**
* wiphy_locked_debugfs_read - do a locked read in debugfs
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 243f972267b8..be9356d4b67a 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -164,7 +164,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr);
+ __wsum diff, bool pseudohdr, bool ipv6);
static __always_inline
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index fbb9a2668e24..b8783126c1ed 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -35,7 +35,7 @@ struct devlink_port_phys_attrs {
/**
* struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes
* @controller: Associated controller number
- * @pf: Associated PCI PF number for this port.
+ * @pf: associated PCI function number for the devlink port instance
* @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_pf_attrs {
@@ -47,8 +47,9 @@ struct devlink_port_pci_pf_attrs {
/**
* struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes
* @controller: Associated controller number
- * @pf: Associated PCI PF number for this port.
- * @vf: Associated PCI VF for of the PCI PF for this port.
+ * @pf: associated PCI function number for the devlink port instance
+ * @vf: associated PCI VF number of a PF for the devlink port instance;
+ * VF number starts from 0 for the first PCI virtual function
* @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_vf_attrs {
@@ -61,8 +62,8 @@ struct devlink_port_pci_vf_attrs {
/**
* struct devlink_port_pci_sf_attrs - devlink port's PCI SF attributes
* @controller: Associated controller number
- * @sf: Associated PCI SF for of the PCI PF for this port.
- * @pf: Associated PCI PF number for this port.
+ * @sf: associated SF number of a PF for the devlink port instance
+ * @pf: associated PCI function number for the devlink port instance
* @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_sf_attrs {
@@ -1261,6 +1262,18 @@ enum devlink_trap_group_generic_id {
.min_burst = _min_burst, \
}
+#define devlink_fmsg_put(fmsg, name, value) ( \
+ _Generic((value), \
+ bool : devlink_fmsg_bool_pair_put, \
+ u8 : devlink_fmsg_u8_pair_put, \
+ u16 : devlink_fmsg_u32_pair_put, \
+ u32 : devlink_fmsg_u32_pair_put, \
+ u64 : devlink_fmsg_u64_pair_put, \
+ int : devlink_fmsg_u32_pair_put, \
+ char * : devlink_fmsg_string_pair_put, \
+ const char * : devlink_fmsg_string_pair_put) \
+ (fmsg, name, (value)))
+
enum {
/* device supports reload operations */
DEVLINK_F_RELOAD = 1UL << 0,
@@ -1522,6 +1535,7 @@ int devl_trylock(struct devlink *devlink);
void devl_unlock(struct devlink *devlink);
void devl_assert_locked(struct devlink *devlink);
bool devl_lock_is_held(struct devlink *devlink);
+DEFINE_GUARD(devl, struct devlink *, devl_lock(_T), devl_unlock(_T));
struct ib_device;
@@ -1994,6 +2008,7 @@ int devlink_compat_switch_id_get(struct net_device *dev,
int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port);
size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port);
+void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb);
#else
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 6c5a1ea209a2..e4fdc6b54cef 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -6,9 +6,13 @@
#define DEFINE_DROP_REASON(FN, FNe) \
FN(NOT_SPECIFIED) \
FN(NO_SOCKET) \
+ FN(SOCKET_CLOSE) \
+ FN(SOCKET_FILTER) \
+ FN(SOCKET_RCVBUFF) \
+ FN(UNIX_DISCONNECT) \
+ FN(UNIX_SKIP_OOB) \
FN(PKT_TOO_SMALL) \
FN(TCP_CSUM) \
- FN(SOCKET_FILTER) \
FN(UDP_CSUM) \
FN(NETFILTER_DROP) \
FN(OTHERHOST) \
@@ -18,7 +22,6 @@
FN(UNICAST_IN_L2_MULTICAST) \
FN(XFRM_POLICY) \
FN(IP_NOPROTO) \
- FN(SOCKET_RCVBUFF) \
FN(PROTO_MEM) \
FN(TCP_AUTH_HDR) \
FN(TCP_MD5NOTFOUND) \
@@ -36,6 +39,9 @@
FN(TCP_OVERWINDOW) \
FN(TCP_OFOMERGE) \
FN(TCP_RFC7323_PAWS) \
+ FN(TCP_RFC7323_PAWS_ACK) \
+ FN(TCP_RFC7323_TSECR) \
+ FN(TCP_LISTEN_OVERFLOW) \
FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
FN(TCP_INVALID_ACK_SEQUENCE) \
@@ -58,6 +64,12 @@
FN(TC_EGRESS) \
FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
+ FN(QDISC_OVERLIMIT) \
+ FN(QDISC_CONGESTED) \
+ FN(CAKE_FLOOD) \
+ FN(FQ_BAND_LIMIT) \
+ FN(FQ_HORIZON_LIMIT) \
+ FN(FQ_FLOW_LIMIT) \
FN(CPU_BACKLOG) \
FN(XDP) \
FN(TC_INGRESS) \
@@ -100,11 +112,13 @@
FN(VXLAN_VNI_NOT_FOUND) \
FN(MAC_INVALID_SOURCE) \
FN(VXLAN_ENTRY_EXISTS) \
- FN(VXLAN_NO_REMOTE) \
+ FN(NO_TX_TARGET) \
FN(IP_TUNNEL_ECN) \
FN(TUNNEL_TXINFO) \
FN(LOCAL_MAC) \
FN(ARP_PVLAN_DISABLE) \
+ FN(MAC_IEEE_MAC_CONTROL) \
+ FN(BRIDGE_INGRESS_STP_STATE) \
FNe(MAX)
/**
@@ -129,12 +143,27 @@ enum skb_drop_reason {
* 3) no valid child socket during 3WHS process
*/
SKB_DROP_REASON_NO_SOCKET,
+ /** @SKB_DROP_REASON_SOCKET_CLOSE: socket is close()d */
+ SKB_DROP_REASON_SOCKET_CLOSE,
+ /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
+ SKB_DROP_REASON_SOCKET_FILTER,
+ /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
+ SKB_DROP_REASON_SOCKET_RCVBUFF,
+ /**
+ * @SKB_DROP_REASON_UNIX_DISCONNECT: recv queue is purged when SOCK_DGRAM
+ * or SOCK_SEQPACKET socket re-connect()s to another socket or notices
+ * during send() that the peer has been close()d.
+ */
+ SKB_DROP_REASON_UNIX_DISCONNECT,
+ /**
+ * @SKB_DROP_REASON_UNIX_SKIP_OOB: Out-Of-Band data is skipped by
+ * recv() without MSG_OOB so dropped.
+ */
+ SKB_DROP_REASON_UNIX_SKIP_OOB,
/** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
SKB_DROP_REASON_PKT_TOO_SMALL,
/** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */
SKB_DROP_REASON_TCP_CSUM,
- /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */
- SKB_DROP_REASON_SOCKET_FILTER,
/** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */
SKB_DROP_REASON_UDP_CSUM,
/** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */
@@ -165,8 +194,6 @@ enum skb_drop_reason {
SKB_DROP_REASON_XFRM_POLICY,
/** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */
SKB_DROP_REASON_IP_NOPROTO,
- /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */
- SKB_DROP_REASON_SOCKET_RCVBUFF,
/**
* @SKB_DROP_REASON_PROTO_MEM: proto memory limitation, such as
* udp packet drop out of udp_memory_allocated.
@@ -251,6 +278,18 @@ enum skb_drop_reason {
* LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK: PAWS check, old ACK packet.
+ * Corresponds to LINUX_MIB_PAWS_OLD_ACK.
+ */
+ SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK,
+ /**
+ * @SKB_DROP_REASON_TCP_RFC7323_TSECR: PAWS check, invalid TSEcr.
+ * Corresponds to LINUX_MIB_TSECRREJECTED.
+ */
+ SKB_DROP_REASON_TCP_RFC7323_TSECR,
+ /** @SKB_DROP_REASON_TCP_LISTEN_OVERFLOW: listener queue full. */
+ SKB_DROP_REASON_TCP_LISTEN_OVERFLOW,
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
@@ -312,6 +351,36 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_QDISC_DROP,
/**
+ * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
+ * instance exceeds its total buffer size limit.
+ */
+ SKB_DROP_REASON_QDISC_OVERLIMIT,
+ /**
+ * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm
+ * due to congestion.
+ */
+ SKB_DROP_REASON_QDISC_CONGESTED,
+ /**
+ * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of
+ * CAKE qdisc AQM algorithm (BLUE).
+ */
+ SKB_DROP_REASON_CAKE_FLOOD,
+ /**
+ * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band
+ * limit is reached.
+ */
+ SKB_DROP_REASON_FQ_BAND_LIMIT,
+ /**
+ * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet
+ * timestamp is too far in the future.
+ */
+ SKB_DROP_REASON_FQ_HORIZON_LIMIT,
+ /**
+ * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow
+ * exceeds its limits.
+ */
+ SKB_DROP_REASON_FQ_FLOW_LIMIT,
+ /**
* @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
* backlog queue. This can be caused by backlog queue full (see
* netdev_max_backlog in net.rst) or RPS flow limit
@@ -461,8 +530,8 @@ enum skb_drop_reason {
* entry or an entry pointing to a nexthop.
*/
SKB_DROP_REASON_VXLAN_ENTRY_EXISTS,
- /** @SKB_DROP_REASON_VXLAN_NO_REMOTE: no remote found for xmit */
- SKB_DROP_REASON_VXLAN_NO_REMOTE,
+ /** @SKB_DROP_REASON_NO_TX_TARGET: no target found for xmit */
+ SKB_DROP_REASON_NO_TX_TARGET,
/**
* @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to
* RFC 6040 4.2, see __INET_ECN_decapsulate() for detail.
@@ -485,6 +554,16 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_ARP_PVLAN_DISABLE,
/**
+ * @SKB_DROP_REASON_MAC_IEEE_MAC_CONTROL: the destination MAC address
+ * is an IEEE MAC Control address.
+ */
+ SKB_DROP_REASON_MAC_IEEE_MAC_CONTROL,
+ /**
+ * @SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE: the STP state of the
+ * ingress bridge port does not allow frames to be forwarded.
+ */
+ SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE,
+ /**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
*/
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 56cb7be92244..7d3b1a2a6fec 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -18,12 +18,6 @@ enum skb_drop_reason_subsys {
SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE,
/**
- * @SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR: mac80211 drop reasons
- * for frames still going to monitor, see net/mac80211/drop.h
- */
- SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR,
-
- /**
* @SKB_DROP_REASON_SUBSYS_OPENVSWITCH: openvswitch drop reasons,
* see net/openvswitch/drop.h
*/
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 72ae65e7246a..a0a9481c52c2 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -906,6 +906,8 @@ struct dsa_switch_ops {
void (*get_rmon_stats)(struct dsa_switch *ds, int port,
struct ethtool_rmon_stats *rmon_stats,
const struct ethtool_rmon_hist_range **ranges);
+ void (*get_ts_stats)(struct dsa_switch *ds, int port,
+ struct ethtool_ts_stats *ts_stats);
void (*get_stats64)(struct dsa_switch *ds, int port,
struct rtnl_link_stats64 *s);
void (*get_pause_stats)(struct dsa_switch *ds, int port,
@@ -988,10 +990,9 @@ struct dsa_switch_ops {
/*
* Port's MAC EEE settings
*/
+ bool (*support_eee)(struct dsa_switch *ds, int port);
int (*set_mac_eee)(struct dsa_switch *ds, int port,
struct ethtool_keee *e);
- int (*get_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_keee *e);
/* EEPROM access */
int (*get_eeprom_len)(struct dsa_switch *ds);
@@ -1383,5 +1384,6 @@ static inline bool dsa_user_dev_check(const struct net_device *dev)
netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
+bool dsa_supports_eee(struct dsa_switch *ds, int port);
#endif
diff --git a/include/net/dst.h b/include/net/dst.h
index 08647c99d79c..78c78cdce0e9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -307,7 +307,7 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
* @skb: buffer
*
* If dst is not yet refcounted and not destroyed, grab a ref on it.
- * Returns true if dst is refcounted.
+ * Returns: true if dst is refcounted.
*/
static inline bool skb_dst_force(struct sk_buff *skb)
{
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 84c15402931c..4160731dcb6e 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -163,11 +163,8 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
if (!new_md)
return ERR_PTR(-ENOMEM);
- unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
- sizeof(struct ip_tunnel_info) + md_size,
- /* metadata_dst_alloc() reserves room (md_size bytes) for
- * options right after the ip_tunnel_info struct.
- */);
+ memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+ sizeof(struct ip_tunnel_info) + md_size);
#ifdef CONFIG_DST_CACHE
/* Unclone the dst cache if there is one */
if (new_md->u.tun_info.dst_cache.cache) {
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 04383d90a1e3..6e68e359ad18 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -43,6 +43,10 @@ struct fib_rule {
struct fib_kuid_range uid_range;
struct fib_rule_port_range sport_range;
struct fib_rule_port_range dport_range;
+ u16 sport_mask;
+ u16 dport_mask;
+ u8 iif_is_l3_master;
+ u8 oif_is_l3_master;
struct rcu_head rcu;
};
@@ -146,6 +150,17 @@ static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
ntohs(port) <= a->end;
}
+static inline bool fib_rule_port_match(const struct fib_rule_port_range *range,
+ u16 port_mask, __be16 port)
+{
+ if ((range->start ^ ntohs(port)) & port_mask)
+ return false;
+ if (!port_mask && fib_rule_port_range_set(range) &&
+ !fib_rule_port_inrange(range, port))
+ return false;
+ return true;
+}
+
static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
{
return a->start != 0 && a->end != 0 && a->end < 0xffff &&
@@ -159,6 +174,12 @@ static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
a->end == b->end;
}
+static inline bool
+fib_rule_port_is_range(const struct fib_rule_port_range *range)
+{
+ return range->start != range->end;
+}
+
static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
{
return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto ||
@@ -178,10 +199,10 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
struct netlink_ext_ack *extack);
unsigned int fib_rules_seq_read(const struct net *net, int family);
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack);
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack);
+int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held);
+int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, bool rtnl_held);
INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule,
struct flowi *fl, int flags));
diff --git a/include/net/flow.h b/include/net/flow.h
index 335bbc52171c..2a3f0c42f092 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -38,6 +38,7 @@ struct flowi_common {
__u8 flowic_flags;
#define FLOWI_FLAG_ANYSRC 0x01
#define FLOWI_FLAG_KNOWN_NH 0x02
+#define FLOWI_FLAG_L3MDEV_OIF 0x04
__u32 flowic_secid;
kuid_t flowic_uid;
__u32 flowic_multipath_hash;
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index d096cc6352de..a03d56765832 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -354,7 +354,7 @@ __genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags)
* such requests) or a struct initialized by genl_info_init_ntf()
* when constructing notifications.
*
- * Returns pointer to new genetlink header.
+ * Returns: pointer to new genetlink header.
*/
static inline void *
genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
@@ -366,7 +366,7 @@ genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
* genlmsg_nlhdr - Obtain netlink header from user specified header
* @user_hdr: user header as returned from genlmsg_put()
*
- * Returns pointer to netlink header.
+ * Returns: pointer to netlink header.
*/
static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr)
{
@@ -435,7 +435,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb,
* @flags: netlink message flags
* @cmd: generic netlink command
*
- * Returns pointer to user specific header
+ * Returns: pointer to user specific header
*/
static inline void *genlmsg_put_reply(struct sk_buff *skb,
struct genl_info *info,
diff --git a/include/net/gro.h b/include/net/gro.h
index 7b548f91754b..22d3a69e4404 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -509,28 +509,46 @@ static inline int gro_receive_network_flush(const void *th, const void *th2,
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
+void __gro_flush(struct gro_node *gro, bool flush_old);
+
+static inline void gro_flush(struct gro_node *gro, bool flush_old)
+{
+ if (!gro->bitmask)
+ return;
+
+ __gro_flush(gro, flush_old);
+}
+
+static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+{
+ gro_flush(&napi->gro, flush_old);
+}
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static inline void gro_normal_list(struct napi_struct *napi)
+static inline void gro_normal_list(struct gro_node *gro)
{
- if (!napi->rx_count)
+ if (!gro->rx_count)
return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
+ netif_receive_skb_list_internal(&gro->rx_list);
+ INIT_LIST_HEAD(&gro->rx_list);
+ gro->rx_count = 0;
}
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
* pass the whole batch up to the stack.
*/
-static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
+static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb,
+ int segs)
{
- list_add_tail(&skb->list, &napi->rx_list);
- napi->rx_count += segs;
- if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
- gro_normal_list(napi);
+ list_add_tail(&skb->list, &gro->rx_list);
+ gro->rx_count += segs;
+ if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
+ gro_normal_list(gro);
}
+void gro_init(struct gro_node *gro);
+void gro_cleanup(struct gro_node *gro);
+
/* This function is the alternative of 'inet_iif' and 'inet_sdif'
* functions in case we can not rely on fields of IPCB.
*
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 30e9570beb2a..fda94b2647ff 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -23,7 +23,6 @@ struct net_hotdata {
struct net_offload udpv6_offload;
#endif
struct list_head offload_base;
- struct list_head ptype_all;
struct kmem_cache *skbuff_cache;
struct kmem_cache *skbuff_fclone_cache;
struct kmem_cache *skb_small_head_cache;
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 025bd8d3c769..745891d2e113 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -21,8 +21,6 @@ struct sockaddr;
struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6,
const struct request_sock *req, u8 proto);
-void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-
int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 74dd90ff5f12..c32878c69179 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -150,7 +150,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
int iif, int sdif,
bool *refcounted)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = dev_net_rcu(skb_dst(skb)->dev);
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct sock *sk;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c7f42844c79a..1735db332aab 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -44,12 +44,10 @@ struct inet_connection_sock_af_ops {
struct request_sock *req_unhash,
bool *own_req);
u16 net_header_len;
- u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
- void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
void (*mtu_reduced)(struct sock *sk);
};
@@ -58,7 +56,6 @@ struct inet_connection_sock_af_ops {
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_bind2_hash: Bind node in the bhash2 table
- * @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
* @icsk_pmtu_cookie Last pmtu seen by socket
@@ -66,7 +63,6 @@ struct inet_connection_sock_af_ops {
* @icsk_af_ops Operations which are AF_INET{4,6} specific
* @icsk_ulp_ops Pluggable ULP control hook
* @icsk_ulp_data ULP private data
- * @icsk_clean_acked Clean acked data hook
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
@@ -85,18 +81,17 @@ struct inet_connection_sock {
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
struct inet_bind2_bucket *icsk_bind2_hash;
- unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
__u32 icsk_rto_min;
+ u32 icsk_rto_max;
__u32 icsk_delack_max;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops *icsk_ulp_ops;
void __rcu *icsk_ulp_data;
- void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:5,
icsk_ca_initialized:1,
@@ -116,8 +111,8 @@ struct inet_connection_sock {
#define ATO_BITS 8
__u32 ato:ATO_BITS, /* Predicted tick of soft clock */
lrcv_flowlabel:20, /* last received ipv6 flowlabel */
- unused:4;
- unsigned long timeout; /* Currently scheduled timeout */
+ dst_quick_ack:1, /* cache dst RTAX_QUICKACK */
+ unused:3;
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
__u16 rcv_mss; /* MSS used for delayed ACK decisions */
@@ -189,8 +184,17 @@ static inline void inet_csk_delack_init(struct sock *sk)
memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
}
-void inet_csk_delete_keepalive_timer(struct sock *sk);
-void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
+static inline unsigned long
+icsk_timeout(const struct inet_connection_sock *icsk)
+{
+ return READ_ONCE(icsk->icsk_retransmit_timer.expires);
+}
+
+static inline unsigned long
+icsk_delack_timeout(const struct inet_connection_sock *icsk)
+{
+ return READ_ONCE(icsk->icsk_delack_timer.expires);
+}
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
@@ -227,16 +231,15 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
when = max_when;
}
+ when += jiffies;
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
smp_store_release(&icsk->icsk_pending, what);
- icsk->icsk_timeout = jiffies + when;
- sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
+ sk_reset_timer(sk, &icsk->icsk_retransmit_timer, when);
} else if (what == ICSK_TIME_DACK) {
smp_store_release(&icsk->icsk_ack.pending,
icsk->icsk_ack.pending | ICSK_ACK_TIMER);
- icsk->icsk_ack.timeout = jiffies + when;
- sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
+ sk_reset_timer(sk, &icsk->icsk_delack_timer, when);
} else {
pr_debug("inet_csk BUG: unknown timer value\n");
}
@@ -318,8 +321,6 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
int inet_csk_listen_start(struct sock *sk);
void inet_csk_listen_stop(struct sock *sk);
-void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-
/* update the fast reuse flag when adding a socket */
void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct sock *sk);
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 5af6eb14c5db..0eccd9c3a883 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -137,7 +137,7 @@ static inline void fqdir_pre_exit(struct fqdir *fqdir)
}
void fqdir_exit(struct fqdir *fqdir);
-void inet_frag_kill(struct inet_frag_queue *q);
+void inet_frag_kill(struct inet_frag_queue *q, int *refs);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
@@ -145,9 +145,9 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
unsigned int inet_frag_rbtree_purge(struct rb_root *root,
enum skb_drop_reason reason);
-static inline void inet_frag_put(struct inet_frag_queue *q)
+static inline void inet_frag_putn(struct inet_frag_queue *q, int refs)
{
- if (refcount_dec_and_test(&q->refcnt))
+ if (refs && refcount_sub_and_test(refs, &q->refcnt))
inet_frag_destroy(q);
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5eea47f135a4..949641e92539 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -89,6 +89,7 @@ struct inet_bind_bucket {
bool fast_ipv6_only;
struct hlist_node node;
struct hlist_head bhash2;
+ struct rcu_head rcu;
};
struct inet_bind2_bucket {
@@ -226,8 +227,7 @@ struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
struct inet_bind_hashbucket *head,
const unsigned short snum, int l3mdev);
-void inet_bind_bucket_destroy(struct kmem_cache *cachep,
- struct inet_bind_bucket *tb);
+void inet_bind_bucket_destroy(struct inet_bind_bucket *tb);
bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
const struct net *net, unsigned short port,
@@ -492,7 +492,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
const int sdif,
bool *refcounted)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = dev_net_rcu(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
struct sock *sk;
@@ -527,9 +527,12 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u64 port_offset,
+ u32 hash_port0,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
- struct inet_timewait_sock **));
+ struct inet_timewait_sock **,
+ bool rcu_lookup,
+ u32 hash));
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 56d8bc5593d3..1086256549fa 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -19,6 +19,7 @@
#include <linux/netdevice.h>
#include <net/flow.h>
+#include <net/inet_dscp.h>
#include <net/sock.h>
#include <net/request_sock.h>
#include <net/netns/hash.h>
@@ -172,7 +173,7 @@ struct inet_cork {
u8 tx_flags;
__u8 ttl;
__s16 tos;
- char priority;
+ u32 priority;
__u16 gso_size;
u32 ts_opt_id;
u64 transmit_time;
@@ -302,6 +303,11 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL;
}
+static inline dscp_t inet_sk_dscp(const struct inet_sock *inet)
+{
+ return inet_dsfield_to_dscp(READ_ONCE(inet->tos));
+}
+
#define inet_test_bit(nr, sk) \
test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
#define inet_set_bit(nr, sk) \
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 62c0a7e65d6b..67a313575780 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -74,6 +74,10 @@ struct inet_timewait_sock {
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
+ /**
+ * @tw_reuse_stamp: Time of entry into %TCP_TIME_WAIT state in msec.
+ */
+ u32 tw_entry_stamp;
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
struct inet_bind2_bucket *tw_tb2;
diff --git a/include/net/ip.h b/include/net/ip.h
index 23ecb10945b0..47ed6d23853d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -81,7 +81,6 @@ struct ipcm_cookie {
__u8 protocol;
__u8 ttl;
__s16 tos;
- char priority;
__u16 gso_size;
};
@@ -93,10 +92,12 @@ static inline void ipcm_init(struct ipcm_cookie *ipcm)
static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
const struct inet_sock *inet)
{
- ipcm_init(ipcm);
+ *ipcm = (struct ipcm_cookie) {
+ .tos = READ_ONCE(inet->tos),
+ };
+
+ sockcm_init(&ipcm->sockc, &inet->sk);
- ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
- ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
ipcm->protocol = inet->inet_num;
@@ -257,13 +258,6 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
return RT_SCOPE_UNIVERSE;
}
-static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
-{
- u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos);
-
- return dsfield & INET_DSCP_MASK;
-}
-
/* datagram.c */
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
@@ -363,7 +357,7 @@ static inline void inet_get_local_port_range(const struct net *net, int *low, in
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
#ifdef CONFIG_SYSCTL
-static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
+static inline bool inet_is_local_reserved_port(const struct net *net, unsigned short port)
{
if (!net->ipv4.sysctl_local_reserved_ports)
return false;
@@ -692,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
#endif
+#if IS_MODULE(CONFIG_IPV6)
+#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
+#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
+#else
+#define EXPORT_IPV6_MOD(X)
+#define EXPORT_IPV6_MOD_GPL(X)
+#endif
+
static inline unsigned int ipv4_addr_hash(__be32 ip)
{
return (__force unsigned int) ip;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a113c11ab56b..e3864b74e92a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -162,6 +162,8 @@ struct fib_info {
struct fib_nh fib_nh[] __counted_by(fib_nhs);
};
+int __net_init fib4_semantics_init(struct net *net);
+void __net_exit fib4_semantics_exit(struct net *net);
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rule;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1aa31bdb2b31..a36a335cef9f 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -95,8 +95,8 @@ struct ip_tunnel_encap {
#define ip_tunnel_info_opts(info) \
_Generic(info, \
- const struct ip_tunnel_info * : ((const void *)((info) + 1)),\
- struct ip_tunnel_info * : ((void *)((info) + 1))\
+ const struct ip_tunnel_info * : ((const void *)(info)->options),\
+ struct ip_tunnel_info * : ((void *)(info)->options)\
)
struct ip_tunnel_info {
@@ -107,6 +107,7 @@ struct ip_tunnel_info {
#endif
u8 options_len;
u8 mode;
+ u8 options[] __aligned_largest __counted_by(options_len);
};
/* 6rd prefix/relay information */
@@ -406,8 +407,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
bool log_ecn_error);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm_kern *p, __u32 fwmark);
-int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm_kern *p, __u32 fwmark);
+int ip_tunnel_newlink(struct net *net, struct net_device *dev,
+ struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
+ __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
@@ -650,7 +652,7 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
static inline void ip_tunnel_info_opts_get(void *to,
const struct ip_tunnel_info *info)
{
- memcpy(to, info + 1, info->options_len);
+ memcpy(to, ip_tunnel_info_opts(info), info->options_len);
}
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index 8660a2a6d1fc..51401f01e2a5 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -3,20 +3,9 @@
#define _NET_IPCOMP_H
#include <linux/skbuff.h>
-#include <linux/types.h>
-
-#define IPCOMP_SCRATCH_SIZE 65400
-
-struct crypto_comp;
-struct ip_comp_hdr;
-
-struct ipcomp_data {
- u16 threshold;
- struct crypto_comp * __percpu *tfms;
-};
struct ip_comp_hdr;
-struct sk_buff;
+struct netlink_ext_ack;
struct xfrm_state;
int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 248bfb26e2af..2ccdf85f34f1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -246,17 +246,20 @@ extern int sysctl_mld_qrv;
#define _DEVADD(net, statname, mod, idev, field, val) \
({ \
struct inet6_dev *_idev = (idev); \
+ unsigned long _field = (field); \
+ unsigned long _val = (val); \
if (likely(_idev != NULL)) \
- mod##SNMP_ADD_STATS((_idev)->stats.statname, (field), (val)); \
- mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, (field), (val));\
+ mod##SNMP_ADD_STATS((_idev)->stats.statname, _field, _val); \
+ mod##SNMP_ADD_STATS((net)->mib.statname##_statistics, _field, _val);\
})
#define _DEVUPD(net, statname, mod, idev, field, val) \
({ \
struct inet6_dev *_idev = (idev); \
+ unsigned long _val = (val); \
if (likely(_idev != NULL)) \
- mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, (val)); \
- mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, (val));\
+ mod##SNMP_UPD_PO_STATS((_idev)->stats.statname, field, _val); \
+ mod##SNMP_UPD_PO_STATS((net)->mib.statname##_statistics, field, _val);\
})
/* MIBs */
@@ -363,15 +366,6 @@ struct ipcm6_cookie {
struct ipv6_txoptions *opt;
};
-static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
-{
- *ipc6 = (struct ipcm6_cookie) {
- .hlimit = -1,
- .tclass = -1,
- .dontfrag = -1,
- };
-}
-
static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
const struct sock *sk)
{
@@ -380,6 +374,8 @@ static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
.tclass = inet6_sk(sk)->tclass,
.dontfrag = inet6_test_bit(DONTFRAG, sk),
};
+
+ sockcm_init(&ipc6->sockc, sk);
}
static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
@@ -471,7 +467,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
/* This helper is specialized for BIG TCP needs.
* It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
* It assumes headers are already in skb->head.
- * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there.
+ * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there.
*/
static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
{
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 7321ffe3a108..38ef66826939 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -66,6 +66,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
struct sk_buff *head;
+ int refs = 1;
rcu_read_lock();
/* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
@@ -77,7 +78,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
goto out;
fq->q.flags |= INET_FRAG_DROP;
- inet_frag_kill(&fq->q);
+ inet_frag_kill(&fq->q, &refs);
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
@@ -109,7 +110,7 @@ out:
spin_unlock(&fq->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- inet_frag_put(&fq->q);
+ inet_frag_putn(&fq->q, refs);
}
/* Check if the upper layer header is truncated in the first fragment. */
diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index dd9e93c12260..9804fa5d9c67 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -202,7 +202,7 @@ struct iucv_handler {
*
* Registers a driver with IUCV.
*
- * Returns 0 on success, -ENOMEM if the memory allocation for the pathid
+ * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid
* table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus.
*/
int iucv_register(struct iucv_handler *handler, int smp);
@@ -224,7 +224,7 @@ void iucv_unregister(struct iucv_handler *handle, int smp);
*
* Allocate a new path structure for use with iucv_connect.
*
- * Returns NULL if the memory allocation failed or a pointer to the
+ * Returns: NULL if the memory allocation failed or a pointer to the
* path structure.
*/
static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp)
@@ -260,7 +260,7 @@ static inline void iucv_path_free(struct iucv_path *path)
* This function is issued after the user received a connection pending
* external interrupt and now wishes to complete the IUCV communication path.
*
- * Returns the result of the CP IUCV call.
+ * Returns: the result of the CP IUCV call.
*/
int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
u8 *userdata, void *private);
@@ -278,7 +278,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
* successfully, you are not able to use the path until you receive an IUCV
* Connection Complete external interrupt.
*
- * Returns the result of the CP IUCV call.
+ * Returns: the result of the CP IUCV call.
*/
int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
u8 *userid, u8 *system, u8 *userdata,
@@ -292,7 +292,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
* This function temporarily suspends incoming messages on an IUCV path.
* You can later reactivate the path by invoking the iucv_resume function.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_quiesce(struct iucv_path *path, u8 *userdata);
@@ -304,7 +304,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata);
* This function resumes incoming messages on an IUCV path that has
* been stopped with iucv_path_quiesce.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_resume(struct iucv_path *path, u8 *userdata);
@@ -315,7 +315,7 @@ int iucv_path_resume(struct iucv_path *path, u8 *userdata);
*
* This function terminates an IUCV path.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_path_sever(struct iucv_path *path, u8 *userdata);
@@ -327,7 +327,7 @@ int iucv_path_sever(struct iucv_path *path, u8 *userdata);
*
* Cancels a message you have sent.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
u32 srccls);
@@ -347,7 +347,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: local_bh_enable/local_bh_disable
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size, size_t *residual);
@@ -367,7 +367,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: no locking.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *buffer, size_t size,
@@ -382,7 +382,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
* are notified of a message and the time that you complete the message,
* the message may be rejected.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
@@ -399,7 +399,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg);
* pathid, msgid, and trgcls. Prmmsg signifies the data is moved into
* the parameter list.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
u8 flags, void *reply, size_t size);
@@ -419,7 +419,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: local_bh_enable/local_bh_disable
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
@@ -439,7 +439,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
*
* Locking: no locking.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size);
@@ -461,7 +461,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
* reply to the message and a buffer is provided into which IUCV moves
* the reply to this message.
*
- * Returns the result from the CP IUCV call.
+ * Returns: the result from the CP IUCV call.
*/
int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
u8 flags, u32 srccls, void *buffer, size_t size,
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index f7fe796e8429..1eb8dad18f7e 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg);
+static inline
+bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
+{
+ return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) &&
+ fl->flowi_l3mdev == iifindex;
+}
+
+static inline
+bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
+{
+ return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF &&
+ fl->flowi_l3mdev == oifindex;
+}
+
void l3mdev_update_flow(struct net *net, struct flowi *fl);
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
@@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
{
return 1;
}
+
+static inline
+bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex)
+{
+ return false;
+}
+
+static inline
+bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex)
+{
+ return false;
+}
+
static inline
void l3mdev_update_flow(struct net *net, struct flowi *fl)
{
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
index 43574bd6612f..ab05024be518 100644
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
@@ -198,6 +198,53 @@ struct libeth_rx_pt {
enum xdp_rss_hash_type hash_type:16;
};
+/**
+ * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor
+ * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware
+ * @ipe: IP checksum error
+ * @eipe: external (outermost) IP header (only for tunels)
+ * @eudpe: external (outermost) UDP checksum error (only for tunels)
+ * @ipv6exadd: IPv6 header with extension headers
+ * @l4e: L4 integrity error
+ * @pprs: set for packets that skip checksum calculation in the HW pre parser
+ * @nat: the packet is a UDP tunneled packet
+ * @raw_csum_valid: set if raw checksum is valid
+ * @pad: padding to naturally align raw_csum field
+ * @raw_csum: raw checksum
+ */
+struct libeth_rx_csum {
+ u32 l3l4p:1;
+ u32 ipe:1;
+ u32 eipe:1;
+ u32 eudpe:1;
+ u32 ipv6exadd:1;
+ u32 l4e:1;
+ u32 pprs:1;
+ u32 nat:1;
+
+ u32 raw_csum_valid:1;
+ u32 pad:7;
+ u32 raw_csum:16;
+};
+
+/**
+ * struct libeth_rqe_info - receive queue element info
+ * @len: packet length
+ * @ptype: packet type based on types programmed into the device
+ * @eop: whether it's the last fragment of the packet
+ * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error
+ * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration
+ */
+struct libeth_rqe_info {
+ u32 len;
+
+ u32 ptype:14;
+ u32 eop:1;
+ u32 rxe:1;
+
+ u32 vlan:16;
+};
+
void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
/**
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 53bd2d02a4f0..39cd50300a18 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -116,9 +116,11 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
unsigned int num);
int lwtunnel_valid_encap_type(u16 encap_type,
- struct netlink_ext_ack *extack);
+ struct netlink_ext_ack *extack,
+ bool rtnl_is_held);
int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
- struct netlink_ext_ack *extack);
+ struct netlink_ext_ack *extack,
+ bool rtnl_is_held);
int lwtunnel_build_state(struct net *net, u16 encap_type,
struct nlattr *encap,
unsigned int family, const void *cfg,
@@ -201,13 +203,15 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
}
static inline int lwtunnel_valid_encap_type(u16 encap_type,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ bool rtnl_is_held)
{
NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel");
return -EOPNOTSUPP;
}
static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ bool rtnl_is_held)
{
/* return 0 since we are not walking attr looking for
* RTA_ENCAP_TYPE attribute on nexthops.
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ab8dce1f2c27..5349df596157 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2024 Intel Corporation
+ * Copyright (C) 2018 - 2025 Intel Corporation
*/
#ifndef MAC80211_H
@@ -702,6 +702,7 @@ struct ieee80211_parsed_tpe {
* @tpe: transmit power envelope information
* @pwr_reduction: power constraint of BSS.
* @eht_support: does this BSS support EHT
+ * @epcs_support: does this BSS support EPCS
* @csa_active: marks whether a channel switch is going on.
* @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @chanctx_conf: The channel context this interface is assigned to, or %NULL
@@ -823,7 +824,7 @@ struct ieee80211_bss_conf {
u8 pwr_reduction;
bool eht_support;
-
+ bool epcs_support;
bool csa_active;
bool mu_mimo_owner;
@@ -1855,6 +1856,9 @@ struct ieee80211_channel_switch {
* operation on this interface and request a channel context without
* the AP definition. Use this e.g. because the device is able to
* handle OFDMA (downlink and trigger for uplink) on a per-AP basis.
+ * @IEEE80211_VIF_REMOVE_AP_AFTER_DISASSOC: indicates that the AP sta should
+ * be removed only after setting the vif as unassociated, and not the
+ * opposite. Only relevant for STA vifs.
*/
enum ieee80211_vif_flags {
IEEE80211_VIF_BEACON_FILTER = BIT(0),
@@ -1863,6 +1867,7 @@ enum ieee80211_vif_flags {
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
IEEE80211_VIF_EML_ACTIVE = BIT(4),
IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW = BIT(5),
+ IEEE80211_VIF_REMOVE_AP_AFTER_DISASSOC = BIT(6),
};
@@ -2216,7 +2221,7 @@ enum ieee80211_key_flags {
* @tx_pn: PN used for TX keys, may be used by the driver as well if it
* needs to do software PN assignment by itself (e.g. due to TSO)
* @flags: key flags, see &enum ieee80211_key_flags.
- * @keyidx: the key index (0-3)
+ * @keyidx: the key index (0-7)
* @keylen: key material length
* @key: key material. For ALG_TKIP the key is encoded as a 256-bit (32 byte)
* data block:
@@ -2225,7 +2230,7 @@ enum ieee80211_key_flags {
* - Temporal Authenticator Rx MIC Key (64 bits)
* @icv_len: The ICV length for this key type
* @iv_len: The IV length for this key type
- * @link_id: the link ID for MLO, or -1 for non-MLO or pairwise keys
+ * @link_id: the link ID, 0 for non-MLO, or -1 for pairwise keys
*/
struct ieee80211_key_conf {
atomic64_t tx_pn;
@@ -2336,6 +2341,8 @@ enum ieee80211_sta_rx_bandwidth {
IEEE80211_STA_RX_BW_320,
};
+#define IEEE80211_STA_RX_BW_MAX IEEE80211_STA_RX_BW_320
+
/**
* struct ieee80211_sta_rates - station rate selection table
*
@@ -2845,6 +2852,11 @@ struct ieee80211_txq {
* implements MLO, so operation can continue on other links when one
* link is switching.
*
+ * @IEEE80211_HW_STRICT: strictly enforce certain things mandated by the spec
+ * but otherwise ignored/worked around for interoperability. This is a
+ * HW flag so drivers can opt in according to their own control, e.g. in
+ * testing.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2905,6 +2917,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_DISALLOW_PUNCTURING,
IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ,
IEEE80211_HW_HANDLES_QUIET_CSA,
+ IEEE80211_HW_STRICT,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -3817,7 +3830,7 @@ enum ieee80211_reconfig_type {
* @was_assoc: set if this call is due to deauth/disassoc
* while just having been associated
* @link_id: the link id on which the frame will be TX'ed.
- * Only used with the mgd_prepare_tx() method.
+ * 0 for a non-MLO connection.
*/
struct ieee80211_prep_tx_info {
u16 duration;
@@ -4762,7 +4775,7 @@ struct ieee80211_ops {
u32 (*get_expected_throughput)(struct ieee80211_hw *hw,
struct ieee80211_sta *sta);
int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
- int *dbm);
+ unsigned int link_id, int *dbm);
int (*tdls_channel_switch)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
@@ -5334,22 +5347,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
int max_rates);
/**
- * ieee80211_sta_set_expected_throughput - set the expected tpt for a station
- *
- * Call this function to notify mac80211 about a change in expected throughput
- * to a station. A driver for a device that does rate control in firmware can
- * call this function when the expected throughput estimate towards a station
- * changes. The information is used to tune the CoDel AQM applied to traffic
- * going towards that station (which can otherwise be too aggressive and cause
- * slow stations to starve).
- *
- * @pubsta: the station to set throughput for.
- * @thr: the current expected throughput in kbps.
- */
-void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
- u32 thr);
-
-/**
* ieee80211_tx_rate_update - transmit rate update callback
*
* Drivers should call this functions with a non-NULL pub sta
@@ -6659,6 +6656,31 @@ void ieee80211_iter_chan_contexts_atomic(
void *iter_data);
/**
+ * ieee80211_iter_chan_contexts_mtx - iterate channel contexts
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @iter: iterator function
+ * @iter_data: data passed to iterator function
+ *
+ * Iterate all active channel contexts. This function can only be used while
+ * holding the wiphy mutex.
+ *
+ * The iterator will not find a context that's being added (during
+ * the driver callback to add it) but will find it while it's being
+ * removed.
+ *
+ * Note that during hardware restart, all contexts that existed
+ * before the restart are considered already present so will be
+ * found while iterating, whether they've been re-added already
+ * or not.
+ */
+void ieee80211_iter_chan_contexts_mtx(
+ struct ieee80211_hw *hw,
+ void (*iter)(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ void *data),
+ void *iter_data);
+
+/**
* ieee80211_ap_probereq_get - retrieve a Probe Request template
* @hw: pointer obtained from ieee80211_alloc_hw().
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
@@ -7734,6 +7756,50 @@ ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
}
}
+/**
+ * ieee80211_prepare_rx_omi_bw - prepare for sending BW RX OMI
+ * @link_sta: the link STA the OMI is going to be sent to
+ * @bw: the bandwidth requested
+ *
+ * When the driver decides to do RX OMI to change bandwidth with a STA
+ * it calls this function to prepare, then sends the OMI, and finally
+ * calls ieee80211_finalize_rx_omi_bw().
+ *
+ * Note that the (link) STA rate control is updated accordingly as well,
+ * but the chanctx might not be updated if there are other users.
+ * If the intention is to reduce the listen bandwidth, the driver must
+ * ensure there are no TDLS stations nor other uses of the chanctx.
+ *
+ * Also note that in order to sequence correctly, narrowing bandwidth
+ * will only happen in ieee80211_finalize_rx_omi_bw(), whereas widening
+ * again (e.g. going back to normal) will happen here.
+ *
+ * Note that we treat this symmetrically, so if the driver calls this
+ * and tells the peer to only send with a lower bandwidth, we assume
+ * that the driver also wants to only send at that lower bandwidth, to
+ * allow narrowing of the chanctx request for this station/interface.
+ *
+ * Finally, the driver must ensure that if the function returned %true,
+ * ieee80211_finalize_rx_omi_bw() is also called, even for example in
+ * case of HW restart.
+ *
+ * Context: Must be called with wiphy mutex held, and will call back
+ * into the driver, so ensure no driver locks are held.
+ *
+ * Return: %true if changes are going to be made, %false otherwise
+ */
+bool ieee80211_prepare_rx_omi_bw(struct ieee80211_link_sta *link_sta,
+ enum ieee80211_sta_rx_bandwidth bw);
+
+/**
+ * ieee80211_finalize_rx_omi_bw - finalize BW RX OMI update
+ * @link_sta: the link STA the OMI was sent to
+ *
+ * See ieee80211_client_prepare_rx_omi_bw(). Context is the same here
+ * as well.
+ */
+void ieee80211_finalize_rx_omi_bw(struct ieee80211_link_sta *link_sta);
+
/* for older drivers - let's not document these ... */
int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx);
diff --git a/include/net/macsec.h b/include/net/macsec.h
index de216cbc6b05..bc7de5b53e54 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -38,8 +38,8 @@ struct metadata_dst;
typedef union salt {
struct {
- u32 ssci;
- u64 pn;
+ ssci_t ssci;
+ __be64 pn;
} __packed;
u8 bytes[MACSEC_SALT_LEN];
} __packed salt_t;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 90f56656b572..228603bf03f2 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -152,6 +152,7 @@ struct gdma_general_req {
#define GDMA_MESSAGE_V1 1
#define GDMA_MESSAGE_V2 2
#define GDMA_MESSAGE_V3 3
+#define GDMA_MESSAGE_V4 4
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
@@ -408,8 +409,6 @@ struct gdma_context {
struct gdma_dev mana_ib;
};
-#define MAX_NUM_GDMA_DEVICES 4
-
static inline bool mana_gd_is_mana(struct gdma_dev *gd)
{
return gd->dev_id.type == GDMA_DEVICE_MANA;
@@ -556,11 +555,15 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
+/* Driver can handle holes (zeros) in the device list */
+#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
- GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT)
+ GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
+ GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
#define GDMA_DRV_CAP_FLAGS2 0
@@ -621,11 +624,12 @@ struct gdma_query_max_resources_resp {
}; /* HW DATA */
/* GDMA_LIST_DEVICES */
+#define GDMA_DEV_LIST_SIZE 64
struct gdma_list_devices_resp {
struct gdma_resp_hdr hdr;
u32 num_of_devs;
u32 reserved;
- struct gdma_dev_id devs[64];
+ struct gdma_dev_id devs[GDMA_DEV_LIST_SIZE];
}; /* HW DATA */
/* GDMA_REGISTER_DEVICE */
@@ -775,6 +779,7 @@ struct gdma_destroy_dma_region_req {
enum gdma_pd_flags {
GDMA_PD_FLAG_INVALID = 0,
+ GDMA_PD_FLAG_ALLOW_GPA_MR = 1,
};
struct gdma_create_pd_req {
@@ -800,6 +805,11 @@ struct gdma_destory_pd_resp {
};/* HW DATA */
enum gdma_mr_type {
+ /*
+ * Guest Physical Address - MRs of this type allow access
+ * to any DMA-mapped memory using bus-logical address
+ */
+ GDMA_MR_TYPE_GPA = 1,
/* Guest Virtual Address - MRs of this type allow access
* to memory mapped by PTEs associated with this MR using a virtual
* address that is set up in the MST
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0d00b24eacaf..0f78065de8fe 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -827,5 +827,7 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
u32 doorbell_pg_id);
void mana_uncfg_vport(struct mana_port_context *apc);
-struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index);
+struct net_device *mana_get_primary_netdev(struct mana_context *ac,
+ u32 port_index,
+ netdevice_tracker *tracker);
#endif /* _MANA_H */
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 1ecbff7116f6..07d458990113 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -212,7 +212,7 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
BUILD_BUG_ON(sizeof(struct mctp_skb_cb) > sizeof(skb->cb));
WARN_ON(cb->magic != 0x4d435450);
- return (void *)(skb->cb);
+ return cb;
}
/* If CONFIG_MCTP_FLOWS, we may add one of these as a SKB extension,
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 814b5f2e3ed5..bfbad695951c 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -14,6 +14,7 @@
struct mptcp_info;
struct mptcp_sock;
+struct mptcp_pm_addr_entry;
struct seq_file;
/* MPTCP sk_buff extension data */
@@ -103,13 +104,14 @@ struct mptcp_out_options {
#define MPTCP_SUBFLOWS_MAX 8
struct mptcp_sched_data {
- bool reinject;
u8 subflows;
struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
};
struct mptcp_sched_ops {
- int (*get_subflow)(struct mptcp_sock *msk,
+ int (*get_send)(struct mptcp_sock *msk,
+ struct mptcp_sched_data *data);
+ int (*get_retrans)(struct mptcp_sock *msk,
struct mptcp_sched_data *data);
char name[MPTCP_SCHED_NAME_MAX];
@@ -120,6 +122,19 @@ struct mptcp_sched_ops {
void (*release)(struct mptcp_sock *msk);
} ____cacheline_aligned_in_smp;
+#define MPTCP_PM_NAME_MAX 16
+#define MPTCP_PM_MAX 128
+#define MPTCP_PM_BUF_MAX (MPTCP_PM_NAME_MAX * MPTCP_PM_MAX)
+
+struct mptcp_pm_ops {
+ char name[MPTCP_PM_NAME_MAX];
+ struct module *owner;
+ struct list_head list;
+
+ void (*init)(struct mptcp_sock *msk);
+ void (*release)(struct mptcp_sock *msk);
+} ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_MPTCP
void mptcp_init(void);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 47181fd749b2..bd57d8fb54f1 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -83,6 +83,9 @@ struct net {
struct llist_node defer_free_list;
struct llist_node cleanup_list; /* namespaces on death row */
+ struct list_head ptype_all;
+ struct list_head ptype_specific;
+
#ifdef CONFIG_KEYS
struct key_tag *key_domain; /* Key domain of operation tag */
#endif
@@ -210,6 +213,8 @@ void net_ns_barrier(void);
struct ns_common *get_net_ns(struct ns_common *ns);
struct net *get_net_ns_by_fd(int fd);
+extern struct task_struct *cleanup_net_task;
+
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h
new file mode 100644
index 000000000000..0ee5bc766810
--- /dev/null
+++ b/include/net/netdev_lock.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _NET_NETDEV_LOCK_H
+#define _NET_NETDEV_LOCK_H
+
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+
+static inline bool netdev_trylock(struct net_device *dev)
+{
+ return mutex_trylock(&dev->lock);
+}
+
+static inline void netdev_assert_locked(const struct net_device *dev)
+{
+ lockdep_assert_held(&dev->lock);
+}
+
+static inline void
+netdev_assert_locked_or_invisible(const struct net_device *dev)
+{
+ if (dev->reg_state == NETREG_REGISTERED ||
+ dev->reg_state == NETREG_UNREGISTERING)
+ netdev_assert_locked(dev);
+}
+
+static inline bool netdev_need_ops_lock(const struct net_device *dev)
+{
+ bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops;
+
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+ ret |= !!dev->netdev_ops->net_shaper_ops;
+#endif
+
+ return ret;
+}
+
+static inline void netdev_lock_ops(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+}
+
+static inline void netdev_unlock_ops(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+}
+
+static inline void netdev_ops_assert_locked(const struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ lockdep_assert_held(&dev->lock);
+ else
+ ASSERT_RTNL();
+}
+
+static inline void
+netdev_ops_assert_locked_or_invisible(const struct net_device *dev)
+{
+ if (dev->reg_state == NETREG_REGISTERED ||
+ dev->reg_state == NETREG_UNREGISTERING)
+ netdev_ops_assert_locked(dev);
+}
+
+static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
+{
+ /* Only lower devices currently grab the instance lock, so no
+ * real ordering issues can occur. In the near future, only
+ * hardware devices will grab instance lock which also does not
+ * involve any ordering. Suppress lockdep ordering warnings
+ * until (if) we start grabbing instance lock on pure SW
+ * devices (bond/team/veth/etc).
+ */
+ if (a == b)
+ return 0;
+ return -1;
+}
+
+#define netdev_lockdep_set_classes(dev) \
+{ \
+ static struct lock_class_key qdisc_tx_busylock_key; \
+ static struct lock_class_key qdisc_xmit_lock_key; \
+ static struct lock_class_key dev_addr_list_lock_key; \
+ static struct lock_class_key dev_instance_lock_key; \
+ unsigned int i; \
+ \
+ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
+ lockdep_set_class(&(dev)->addr_list_lock, \
+ &dev_addr_list_lock_key); \
+ lockdep_set_class(&(dev)->lock, \
+ &dev_instance_lock_key); \
+ lock_set_cmp_fn(&dev->lock, netdev_lock_cmp_fn, NULL); \
+ for (i = 0; i < (dev)->num_tx_queues; i++) \
+ lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \
+ &qdisc_xmit_lock_key); \
+}
+
+#define netdev_lock_dereference(p, dev) \
+ rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock))
+
+int netdev_debug_event(struct notifier_block *nb, unsigned long event,
+ void *ptr);
+
+#endif
diff --git a/include/net/netdev_netlink.h b/include/net/netdev_netlink.h
new file mode 100644
index 000000000000..075962dbe743
--- /dev/null
+++ b/include/net/netdev_netlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_NETDEV_NETLINK_H
+#define __NET_NETDEV_NETLINK_H
+
+#include <linux/list.h>
+
+struct netdev_nl_sock {
+ struct mutex lock;
+ struct list_head bindings;
+};
+
+#endif /* __NET_NETDEV_NETLINK_H */
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 5ca019d294ca..d01c82983b4d 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -4,6 +4,16 @@
#include <linux/netdevice.h>
+/**
+ * struct netdev_config - queue-related configuration for a netdev
+ * @hds_thresh: HDS Threshold value.
+ * @hds_config: HDS value from userspace.
+ */
+struct netdev_config {
+ u32 hds_thresh;
+ u8 hds_config;
+};
+
/* See the netdev.yaml spec for definition of each statistic */
struct netdev_queue_stats_rx {
u64 bytes;
@@ -13,6 +23,7 @@ struct netdev_queue_stats_rx {
u64 hw_drops;
u64 hw_drop_overruns;
+ u64 csum_complete;
u64 csum_unnecessary;
u64 csum_none;
u64 csum_bad;
@@ -92,6 +103,12 @@ struct netdev_stat_ops {
struct netdev_queue_stats_tx *tx);
};
+void netdev_stat_queue_sum(struct net_device *netdev,
+ int rx_start, int rx_end,
+ struct netdev_queue_stats_rx *rx_sum,
+ int tx_start, int tx_end,
+ struct netdev_queue_stats_tx *tx_sum);
+
/**
* struct netdev_queue_mgmt_ops - netdev ops for queue management
*
@@ -107,6 +124,10 @@ struct netdev_stat_ops {
*
* @ndo_queue_stop: Stop the RX queue at the specified index. The stopped
* queue's memory is written at the specified address.
+ *
+ * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
+ * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only
+ * be called for an interface which is open.
*/
struct netdev_queue_mgmt_ops {
size_t ndo_queue_mem_size;
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index 596836abf7bf..b2238b551dce 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -16,6 +16,7 @@ struct netdev_rx_queue {
struct rps_dev_flow_table __rcu *rps_flow_table;
#endif
struct kobject kobj;
+ const struct attribute_group **groups;
struct net_device *dev;
netdevice_tracker dev_tracker;
@@ -23,7 +24,7 @@ struct netdev_rx_queue {
struct xsk_buff_pool *pool;
#endif
/* NAPI instance for the queue
- * Readers and writers must hold RTNL
+ * "ops protected", see comment about net_device::lock
*/
struct napi_struct *napi;
struct pp_memory_provider_params mp_params;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index cba3ccf03fcc..3f02a45773e8 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -204,8 +204,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple);
void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- u32 extra_jiffies, bool do_acct);
+ u32 extra_jiffies, unsigned int bytes);
/* Refresh conntrack for this many jiffies and do accounting */
static inline void nf_ct_refresh_acct(struct nf_conn *ct,
@@ -213,15 +212,14 @@ static inline void nf_ct_refresh_acct(struct nf_conn *ct,
const struct sk_buff *skb,
u32 extra_jiffies)
{
- __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true);
+ __nf_ct_refresh_acct(ct, ctinfo, extra_jiffies, skb->len);
}
/* Refresh conntrack for this many jiffies */
static inline void nf_ct_refresh(struct nf_conn *ct,
- const struct sk_buff *skb,
u32 extra_jiffies)
{
- __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false);
+ __nf_ct_refresh_acct(ct, 0, extra_jiffies, 0);
}
/* kill conntrack and do accounting */
@@ -314,16 +312,6 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
#define NF_CT_DAY (86400 * HZ)
-/* Set an arbitrary timeout large enough not to ever expire, this save
- * us a check for the IPS_OFFLOAD_BIT from the packet path via
- * nf_ct_is_expired().
- */
-static inline void nf_ct_offload_timeout(struct nf_conn *ct)
-{
- if (nf_ct_expires(ct) < NF_CT_DAY / 2)
- WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
-}
-
struct kernel_param;
int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 0c1dac318e02..8dcf7c371ee9 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -12,6 +12,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack_extend.h>
+#include <asm/local64.h>
enum nf_ct_ecache_state {
NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */
@@ -20,6 +21,9 @@ enum nf_ct_ecache_state {
struct nf_conntrack_ecache {
unsigned long cache; /* bitops want long */
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ local64_t timestamp; /* event timestamp, in nanoseconds */
+#endif
u16 ctmask; /* bitmask of ct events to be delivered */
u16 expmask; /* bitmask of expect events to be delivered */
u32 missed; /* missed events */
@@ -108,6 +112,14 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
if (e == NULL)
return;
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ /* renew only if this is the first cached event, so that the
+ * timestamp reflects the first, not the last, generated event.
+ */
+ if (local64_read(&e->timestamp) && READ_ONCE(e->cache) == 0)
+ local64_set(&e->timestamp, ktime_get_real_ns());
+#endif
+
set_bit(event, &e->cache);
#endif
}
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index b63d53bb9dd6..d711642e78b5 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -163,6 +163,7 @@ struct flow_offload_tuple_rhash {
enum nf_flow_flags {
NF_FLOW_SNAT,
NF_FLOW_DNAT,
+ NF_FLOW_CLOSING,
NF_FLOW_TEARDOWN,
NF_FLOW_HW,
NF_FLOW_HW_DYING,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f6958118986a..803d5f1601f9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1201,6 +1201,8 @@ struct nft_hook {
struct list_head list;
struct nf_hook_ops ops;
struct rcu_head rcu;
+ char ifname[IFNAMSIZ];
+ u8 ifnamelen;
};
/**
@@ -1236,8 +1238,6 @@ static inline bool nft_is_base_chain(const struct nft_chain *chain)
return chain->flags & NFT_CHAIN_BASE;
}
-int __nft_release_basechain(struct nft_ctx *ctx);
-
unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
static inline bool nft_use_inc(u32 *use)
@@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void);
void __init nft_chain_route_init(void);
void nft_chain_route_fini(void);
-void nf_tables_trans_destroy_flush_work(void);
+void nf_tables_trans_destroy_flush_work(struct net *net);
int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
__be64 nf_jiffies64_to_msecs(u64 input);
@@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
struct nftables_pernet {
struct list_head tables;
struct list_head commit_list;
+ struct list_head destroy_list;
struct list_head commit_set_list;
struct list_head binding_list;
struct list_head module_list;
@@ -1915,6 +1916,7 @@ struct nftables_pernet {
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
+ struct work_struct destroy_work;
};
extern unsigned int nf_tables_net_id;
diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 5adf6fda11e8..06985530517b 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -49,7 +49,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
*
* nf_tproxy_handle_time_wait4() consumes the socket reference passed in.
*
- * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * Returns: the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
@@ -108,7 +108,7 @@ nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
*
* nf_tproxy_handle_time_wait6() consumes the socket reference passed in.
*
- * Returns the listener socket if there's one, the TIME_WAIT socket if
+ * Returns: the listener socket if there's one, the TIME_WAIT socket if
* no such listener is found, or NULL if the TCP header is incomplete.
*/
struct sock *
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 38cae7113de4..7370fba844ef 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -2,6 +2,7 @@
#ifndef _NFT_FIB_H_
#define _NFT_FIB_H_
+#include <net/l3mdev.h>
#include <net/netfilter/nf_tables.h>
struct nft_fib {
@@ -18,6 +19,35 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in)
return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
+static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sock *sk;
+
+ switch (nft_hook(pkt)) {
+ case NF_INET_PRE_ROUTING:
+ case NF_INET_INGRESS:
+ case NF_INET_LOCAL_IN:
+ break;
+ default:
+ return false;
+ }
+
+ sk = pkt->skb->sk;
+ if (sk && sk_fullsock(sk))
+ return sk->sk_rx_dst_ifindex == indev->ifindex;
+
+ return nft_fib_is_loopback(pkt->skb, indev);
+}
+
+static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt,
+ const struct net_device *iif)
+{
+ const struct net_device *dev = iif ? iif : pkt->skb->dev;
+
+ return l3mdev_master_ifindex_rcu(dev);
+}
+
int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset);
int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[]);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 39eaa6be6ca8..29e0db940382 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -118,6 +118,7 @@
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
+ * nla_put_empty_nest(skb, type) create an empty nest
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
@@ -649,7 +650,7 @@ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
* @nlh: netlink message header
* @remaining: number of bytes remaining in message stream
*
- * Returns the next netlink message in the message stream and
+ * Returns: the next netlink message in the message stream and
* decrements remaining by the size of the current message.
*/
static inline struct nlmsghdr *
@@ -676,7 +677,7 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining)
* exceeding maxtype will be rejected, policy must be specified, attributes
* will be validated in the strictest way possible.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse(struct nlattr **tb, int maxtype,
const struct nlattr *head, int len,
@@ -701,7 +702,7 @@ static inline int nla_parse(struct nlattr **tb, int maxtype,
* exceeding maxtype will be ignored and attributes from the policy are not
* always strictly validated (only for new attributes).
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
const struct nlattr *head, int len,
@@ -726,7 +727,7 @@ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype,
* exceeding maxtype will be rejected as well as trailing data, but the
* policy is not completely strictly validated (only for new attributes).
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype,
const struct nlattr *head,
@@ -833,7 +834,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen,
* @hdrlen: length of family specific header
* @attrtype: type of attribute to look for
*
- * Returns the first attribute which matches the specified type.
+ * Returns: the first attribute which matches the specified type.
*/
static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
int hdrlen, int attrtype)
@@ -854,7 +855,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
* specified policy. Validation is done in liberal mode.
* See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_validate_deprecated(const struct nlattr *head, int len,
int maxtype,
@@ -877,7 +878,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len,
* specified policy. Validation is done in strict mode.
* See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int nla_validate(const struct nlattr *head, int len, int maxtype,
const struct nla_policy *policy,
@@ -914,7 +915,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
* nlmsg_report - need to report back to application?
* @nlh: netlink message header
*
- * Returns 1 if a report back to the application is requested.
+ * Returns: 1 if a report back to the application is requested.
*/
static inline int nlmsg_report(const struct nlmsghdr *nlh)
{
@@ -925,7 +926,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh)
* nlmsg_seq - return the seq number of netlink message
* @nlh: netlink message header
*
- * Returns 0 if netlink message is NULL
+ * Returns: 0 if netlink message is NULL
*/
static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
{
@@ -952,7 +953,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
* @payload: length of message payload
* @flags: message flags
*
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
@@ -971,7 +972,7 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se
*
* Append data to an existing nlmsg, used when constructing a message
* with multiple fixed-format headers (which is rare).
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the extra payload.
*/
static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
@@ -993,7 +994,7 @@ static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
* @payload: length of message payload
* @flags: message flags
*
- * Returns NULL if the tailroom of the skb is insufficient to store
+ * Returns: NULL if the tailroom of the skb is insufficient to store
* the message header and payload.
*/
static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
@@ -1050,7 +1051,7 @@ static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
* nlmsg_get_pos - return current position in netlink message
* @skb: socket buffer the message is stored in
*
- * Returns a pointer to the current tail of the message.
+ * Returns: a pointer to the current tail of the message.
*/
static inline void *nlmsg_get_pos(struct sk_buff *skb)
{
@@ -1276,7 +1277,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining)
* @nla: netlink attribute
* @remaining: number of bytes remaining in attribute stream
*
- * Returns the next netlink attribute in the attribute stream and
+ * Returns: the next netlink attribute in the attribute stream and
* decrements remaining by the size of the current attribute.
*/
static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
@@ -1292,7 +1293,7 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
* @nla: attribute containing the nested attributes
* @attrtype: type of attribute to look for
*
- * Returns the first attribute which matches the specified type.
+ * Returns: the first attribute which matches the specified type.
*/
static inline struct nlattr *
nla_find_nested(const struct nlattr *nla, int attrtype)
@@ -2091,7 +2092,7 @@ static inline int nla_get_flag(const struct nlattr *nla)
* nla_get_msecs - return payload of msecs attribute
* @nla: msecs netlink attribute
*
- * Returns the number of milliseconds in jiffies.
+ * Returns: the number of milliseconds in jiffies.
*/
static inline unsigned long nla_get_msecs(const struct nlattr *nla)
{
@@ -2183,7 +2184,7 @@ static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp)
* marked their nest attributes with NLA_F_NESTED flag. New APIs should use
* nla_nest_start() which sets the flag.
*
- * Returns the container attribute or NULL on error
+ * Returns: the container attribute or NULL on error
*/
static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
int attrtype)
@@ -2204,7 +2205,7 @@ static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb,
* Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED
* flag. This is the preferred function to use in new code.
*
- * Returns the container attribute or NULL on error
+ * Returns: the container attribute or NULL on error
*/
static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
{
@@ -2219,7 +2220,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
* Corrects the container attribute header to include the all
* appended attributes.
*
- * Returns the total data length of the skb.
+ * Returns: the total data length of the skb.
*/
static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
{
@@ -2241,6 +2242,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
}
/**
+ * nla_put_empty_nest - Create an empty nest
+ * @skb: socket buffer the message is stored in
+ * @attrtype: attribute type of the container
+ *
+ * This function is a helper for creating empty nests.
+ *
+ * Returns: 0 when successful or -EMSGSIZE on failure.
+ */
+static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype)
+{
+ return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE;
+}
+
+/**
* __nla_validate_nested - Validate a stream of nested attributes
* @start: container attribute
* @maxtype: maximum attribute type to be expected
@@ -2252,7 +2267,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
* specified policy. Attributes with a type exceeding maxtype will be
* ignored. See documentation of struct nla_policy for more details.
*
- * Returns 0 on success or a negative error code.
+ * Returns: 0 on success or a negative error code.
*/
static inline int __nla_validate_nested(const struct nlattr *start, int maxtype,
const struct nla_policy *policy,
@@ -2285,7 +2300,7 @@ nla_validate_nested_deprecated(const struct nlattr *start, int maxtype,
* nla_need_padding_for_64bit - test 64-bit alignment of the next attribute
* @skb: socket buffer the message is stored in
*
- * Return true if padding is needed to align the next attribute (nla_data()) to
+ * Return: true if padding is needed to align the next attribute (nla_data()) to
* a 64-bit aligned area.
*/
static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
@@ -2312,7 +2327,7 @@ static inline bool nla_need_padding_for_64bit(struct sk_buff *skb)
* This will only be done in architectures which do not have
* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined.
*
- * Returns zero on success or a negative error code.
+ * Returns: zero on success or a negative error code.
*/
static inline int nla_align_64bit(struct sk_buff *skb, int padattr)
{
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 8a6e20be4b9d..c61d5b21e7b4 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -24,11 +24,20 @@ struct net_iov {
unsigned long __unused_padding;
unsigned long pp_magic;
struct page_pool *pp;
- struct dmabuf_genpool_chunk_owner *owner;
+ struct net_iov_area *owner;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
};
+struct net_iov_area {
+ /* Array of net_iovs for this area. */
+ struct net_iov *niovs;
+ size_t num_niovs;
+
+ /* Offset into the dma-buf where this chunk starts. */
+ unsigned long base_virtual;
+};
+
/* These fields in struct page are used by the page_pool and net stack:
*
* struct {
@@ -54,6 +63,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
#undef NET_IOV_ASSERT_OFFSET
+static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov)
+{
+ return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+ return niov - net_iov_owner(niov)->niovs;
+}
+
/* netmem */
/**
@@ -72,6 +91,22 @@ static inline bool netmem_is_net_iov(const netmem_ref netmem)
return (__force unsigned long)netmem & NET_IOV;
}
+/**
+ * __netmem_to_page - unsafely get pointer to the &page backing @netmem
+ * @netmem: netmem reference to convert
+ *
+ * Unsafe version of netmem_to_page(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (no check for the LSB, no WARN). When @netmem points to IOV,
+ * provokes undefined behaviour.
+ *
+ * Return: pointer to the &page (garbage if @netmem is not page-backed).
+ */
+static inline struct page *__netmem_to_page(netmem_ref netmem)
+{
+ return (__force struct page *)netmem;
+}
+
/* This conversion fails (returns NULL) if the netmem_ref is not struct page
* backed.
*/
@@ -80,7 +115,7 @@ static inline struct page *netmem_to_page(netmem_ref netmem)
if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
return NULL;
- return (__force struct page *)netmem;
+ return __netmem_to_page(netmem);
}
static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem)
@@ -103,6 +138,17 @@ static inline netmem_ref page_to_netmem(struct page *page)
return (__force netmem_ref)page;
}
+/**
+ * virt_to_netmem - convert virtual memory pointer to a netmem reference
+ * @data: host memory pointer to convert
+ *
+ * Return: netmem reference to the &page backing this virtual address.
+ */
+static inline netmem_ref virt_to_netmem(const void *data)
+{
+ return page_to_netmem(virt_to_page(data));
+}
+
static inline int netmem_ref_count(netmem_ref netmem)
{
/* The non-pp refcount of net_iov is always 1. On net_iov, we only
@@ -127,6 +173,22 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
}
+/**
+ * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem
+ * @netmem: netmem reference to get the pointer from
+ *
+ * Unsafe version of netmem_get_pp(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (avoids clearing the LSB). When @netmem points to IOV,
+ * provokes invalid memory access.
+ *
+ * Return: pointer to the &page_pool (garbage if @netmem is not page-backed).
+ */
+static inline struct page_pool *__netmem_get_pp(netmem_ref netmem)
+{
+ return __netmem_to_page(netmem)->pp;
+}
+
static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
{
return __netmem_clear_lsb(netmem)->pp;
@@ -158,12 +220,43 @@ static inline netmem_ref netmem_compound_head(netmem_ref netmem)
return page_to_netmem(compound_head(netmem_to_page(netmem)));
}
+/**
+ * __netmem_address - unsafely get pointer to the memory backing @netmem
+ * @netmem: netmem reference to get the pointer for
+ *
+ * Unsafe version of netmem_address(). When @netmem is always page-backed,
+ * e.g. when it's a header buffer, performs faster and generates smaller
+ * object code (no check for the LSB). When @netmem points to IOV, provokes
+ * undefined behaviour.
+ *
+ * Return: pointer to the memory (garbage if @netmem is not page-backed).
+ */
+static inline void *__netmem_address(netmem_ref netmem)
+{
+ return page_address(__netmem_to_page(netmem));
+}
+
static inline void *netmem_address(netmem_ref netmem)
{
if (netmem_is_net_iov(netmem))
return NULL;
- return page_address(netmem_to_page(netmem));
+ return __netmem_address(netmem);
+}
+
+/**
+ * netmem_is_pfmemalloc - check if @netmem was allocated under memory pressure
+ * @netmem: netmem reference to check
+ *
+ * Return: true if @netmem is page-backed and the page was allocated under
+ * memory pressure, false otherwise.
+ */
+static inline bool netmem_is_pfmemalloc(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return false;
+
+ return page_is_pfmemalloc(netmem_to_page(netmem));
}
static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 3c014170e001..650b2dc9199f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -111,6 +111,9 @@ struct netns_ipv4 {
#endif
struct hlist_head *fib_table_hash;
struct sock *fibnl;
+ struct hlist_head *fib_info_hash;
+ unsigned int fib_info_hash_bits;
+ unsigned int fib_info_cnt;
struct sock *mc_autojoin_sk;
@@ -175,11 +178,13 @@ struct netns_ipv4 {
u8 sysctl_tcp_retries2;
u8 sysctl_tcp_orphan_retries;
u8 sysctl_tcp_tw_reuse;
+ unsigned int sysctl_tcp_tw_reuse_delay;
int sysctl_tcp_fin_timeout;
u8 sysctl_tcp_sack;
u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps;
int sysctl_tcp_rto_min_us;
+ int sysctl_tcp_rto_max_ms;
u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle;
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 60a5347922be..582a3d00cbe2 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -104,8 +104,7 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
*
* Get a page fragment from the page allocator or page_pool caches.
*
- * Return:
- * Return allocated page fragment, otherwise return NULL.
+ * Return: allocated page fragment, otherwise return NULL.
*/
static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
unsigned int *offset,
@@ -116,22 +115,22 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
return page_pool_alloc_frag(pool, offset, size, gfp);
}
-static inline struct page *page_pool_alloc(struct page_pool *pool,
- unsigned int *offset,
- unsigned int *size, gfp_t gfp)
+static inline netmem_ref page_pool_alloc_netmem(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size, gfp_t gfp)
{
unsigned int max_size = PAGE_SIZE << pool->p.order;
- struct page *page;
+ netmem_ref netmem;
if ((*size << 1) > max_size) {
*size = max_size;
*offset = 0;
- return page_pool_alloc_pages(pool, gfp);
+ return page_pool_alloc_netmems(pool, gfp);
}
- page = page_pool_alloc_frag(pool, offset, *size, gfp);
- if (unlikely(!page))
- return NULL;
+ netmem = page_pool_alloc_frag_netmem(pool, offset, *size, gfp);
+ if (unlikely(!netmem))
+ return 0;
/* There is very likely not enough space for another fragment, so append
* the remaining size to the current fragment to avoid truesize
@@ -142,7 +141,23 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
pool->frag_offset = max_size;
}
- return page;
+ return netmem;
+}
+
+static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size)
+{
+ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+
+ return page_pool_alloc_netmem(pool, offset, size, gfp);
+}
+
+static inline struct page *page_pool_alloc(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size, gfp_t gfp)
+{
+ return netmem_to_page(page_pool_alloc_netmem(pool, offset, size, gfp));
}
/**
@@ -155,8 +170,7 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
* depending on the requested size in order to allocate memory with least memory
* utilization and performance penalty.
*
- * Return:
- * Return allocated page or page fragment, otherwise return NULL.
+ * Return: allocated page or page fragment, otherwise return NULL.
*/
static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
unsigned int *offset,
@@ -190,8 +204,7 @@ static inline void *page_pool_alloc_va(struct page_pool *pool,
* This is just a thin wrapper around the page_pool_alloc() API, and
* it returns va of the allocated page or page fragment.
*
- * Return:
- * Return the va for the allocated page or page fragment, otherwise return NULL.
+ * Return: the va for the allocated page or page fragment, otherwise return NULL.
*/
static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
unsigned int *size)
@@ -302,7 +315,7 @@ static inline void page_pool_ref_page(struct page *page)
page_pool_ref_netmem(page_to_netmem(page));
}
-static inline bool page_pool_is_last_ref(netmem_ref netmem)
+static inline bool page_pool_unref_and_test(netmem_ref netmem)
{
/* If page_pool_unref_page() returns 0, we were the last user */
return page_pool_unref_netmem(netmem, 1) == 0;
@@ -317,7 +330,7 @@ static inline void page_pool_put_netmem(struct page_pool *pool,
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_ref(netmem))
+ if (!page_pool_unref_and_test(netmem))
return;
page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct);
@@ -418,7 +431,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
*/
static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
{
- return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+ ret <<= PAGE_SHIFT;
+
+ return ret;
+}
+
+static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool,
+ const dma_addr_t dma_addr,
+ u32 offset, u32 dma_sync_size)
+{
+ dma_sync_single_range_for_cpu(pool->p.dev, dma_addr,
+ offset + pool->p.offset, dma_sync_size,
+ page_pool_get_dma_dir(pool));
}
/**
@@ -437,10 +464,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
const struct page *page,
u32 offset, u32 dma_sync_size)
{
- dma_sync_single_range_for_cpu(pool->p.dev,
- page_pool_get_dma_addr(page),
- offset + pool->p.offset, dma_sync_size,
- page_pool_get_dma_dir(pool));
+ __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset,
+ dma_sync_size);
+}
+
+static inline void
+page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool,
+ const netmem_ref netmem, u32 offset,
+ u32 dma_sync_size)
+{
+ if (!pool->dma_sync_for_cpu)
+ return;
+
+ __page_pool_dma_sync_for_cpu(pool,
+ page_pool_get_dma_addr_netmem(netmem),
+ offset, dma_sync_size);
}
static inline bool page_pool_put(struct page_pool *pool)
diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
new file mode 100644
index 000000000000..ada4f968960a
--- /dev/null
+++ b/include/net/page_pool/memory_provider.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_PAGE_POOL_MEMORY_PROVIDER_H
+#define _NET_PAGE_POOL_MEMORY_PROVIDER_H
+
+#include <net/netmem.h>
+#include <net/page_pool/types.h>
+
+struct netdev_rx_queue;
+struct netlink_ext_ack;
+struct sk_buff;
+
+struct memory_provider_ops {
+ netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp);
+ bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem);
+ int (*init)(struct page_pool *pool);
+ void (*destroy)(struct page_pool *pool);
+ int (*nl_fill)(void *mp_priv, struct sk_buff *rsp,
+ struct netdev_rx_queue *rxq);
+ void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq);
+};
+
+bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
+void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
+void net_mp_niov_clear_page_pool(struct net_iov *niov);
+
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p);
+int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *p,
+ struct netlink_ext_ack *extack);
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p);
+void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *old_p);
+
+/**
+ * net_mp_netmem_place_in_cache() - give a netmem to a page pool
+ * @pool: the page pool to place the netmem into
+ * @netmem: netmem to give
+ *
+ * Push an accounted netmem into the page pool's allocation cache. The caller
+ * must ensure that there is space in the cache. It should only be called off
+ * the mp_ops->alloc_netmems() path.
+ */
+static inline void net_mp_netmem_place_in_cache(struct page_pool *pool,
+ netmem_ref netmem)
+{
+ pool->alloc.cache[pool->alloc.count++] = netmem;
+}
+
+#endif
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 386efddd2aac..431b593de709 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -6,6 +6,7 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
#include <linux/types.h>
+#include <linux/xarray.h>
#include <net/netmem.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
@@ -33,6 +34,9 @@
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
+/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */
+#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1)
+
/*
* Fast allocation side cache array/stack
*
@@ -152,8 +156,11 @@ struct page_pool_stats {
*/
#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
+struct memory_provider_ops;
+
struct pp_memory_provider_params {
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
};
struct page_pool {
@@ -164,7 +171,8 @@ struct page_pool {
bool has_init_callback:1; /* slow::init_callback is set */
bool dma_map:1; /* Perform DMA mapping */
- bool dma_sync:1; /* Perform DMA sync */
+ bool dma_sync:1; /* Perform DMA sync for device */
+ bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */
#ifdef CONFIG_PAGE_POOL_STATS
bool system:1; /* This is a global percpu pool */
#endif
@@ -215,6 +223,9 @@ struct page_pool {
struct ptr_ring ring;
void *mp_priv;
+ const struct memory_provider_ops *mp_ops;
+
+ struct xarray dma_mapped;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
@@ -241,7 +252,7 @@ struct page_pool {
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
-netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp);
+netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp);
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
@@ -258,8 +269,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
const struct xdp_mem_info *mem);
-void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count);
+void page_pool_put_netmem_bulk(netmem_ref *data, u32 count);
#else
static inline void page_pool_destroy(struct page_pool *pool)
{
@@ -271,8 +281,7 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool,
{
}
-static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count)
+static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
{
}
#endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4eb0ebb9e76c..c64fd896b1f9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -319,7 +319,7 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts,
* tcf_exts_has_actions - check if at least one action is present
* @exts: tc filter extensions handle
*
- * Returns true if at least one action is present.
+ * Returns: true if at least one action is present.
*/
static inline bool tcf_exts_has_actions(struct tcf_exts *exts)
{
@@ -501,7 +501,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
* through all ematches respecting their logic relations returning
* as soon as the result is obvious.
*
- * Returns 1 if the ematch tree as-one matches, no ematches are configured
+ * Returns: 1 if the ematch tree as-one matches, no ematches are configured
* or ematch is not enabled in the kernel, otherwise 0 is returned.
*/
static inline int tcf_em_tree_match(struct sk_buff *skb,
diff --git a/include/net/route.h b/include/net/route.h
index 64949854d35d..c605fd5ec0c0 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -28,6 +28,7 @@
#include <net/arp.h>
#include <net/ndisc.h>
#include <net/inet_dscp.h>
+#include <net/sock.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -129,6 +130,33 @@ struct in_device;
int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
+
+static inline void inet_sk_init_flowi4(const struct inet_sock *inet,
+ struct flowi4 *fl4)
+{
+ const struct ip_options_rcu *ip4_opt;
+ const struct sock *sk;
+ __be32 daddr;
+
+ rcu_read_lock();
+ ip4_opt = rcu_dereference(inet->inet_opt);
+
+ /* Source routing option overrides the socket destination address */
+ if (ip4_opt && ip4_opt->opt.srr)
+ daddr = ip4_opt->opt.faddr;
+ else
+ daddr = inet->inet_daddr;
+ rcu_read_unlock();
+
+ sk = &inet->sk;
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
+ ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
+ sk->sk_protocol, inet_sk_flowi_flags(sk), daddr,
+ inet->inet_saddr, inet->inet_dport,
+ inet->inet_sport, sk->sk_uid);
+ security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
+}
+
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
const struct sk_buff *skb);
struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp,
@@ -185,20 +213,6 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
return ip_route_output_flow(net, fl4, sk);
}
-static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 *fl4,
- __be32 daddr, __be32 saddr,
- __be32 gre_key, __u8 tos, int oif)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = IPPROTO_GRE;
- fl4->fl4_gre_key = gre_key;
- return ip_route_output_key(net, fl4);
-}
-
enum skb_drop_reason
ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
diff --git a/include/net/rps.h b/include/net/rps.h
index a93401d23d66..e358e9711f27 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -39,7 +39,7 @@ struct rps_dev_flow {
* The rps_dev_flow_table structure contains a table of flow mappings.
*/
struct rps_dev_flow_table {
- unsigned int mask;
+ u8 log;
struct rcu_head rcu;
struct rps_dev_flow flows[];
};
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index bc0069a8b6ea..ec65a8cebb99 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -70,6 +70,40 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
}
/**
+ * struct rtnl_newlink_params - parameters of rtnl_link_ops::newlink()
+ *
+ * @src_net: Source netns of rtnetlink socket
+ * @link_net: Link netns by IFLA_LINK_NETNSID, NULL if not specified
+ * @peer_net: Peer netns
+ * @tb: IFLA_* attributes
+ * @data: IFLA_INFO_DATA attributes
+ */
+struct rtnl_newlink_params {
+ struct net *src_net;
+ struct net *link_net;
+ struct net *peer_net;
+ struct nlattr **tb;
+ struct nlattr **data;
+};
+
+/* Get effective link netns from newlink params. Generally, this is link_net
+ * and falls back to src_net. But for compatibility, a driver may * choose to
+ * use dev_net(dev) instead.
+ */
+static inline struct net *rtnl_newlink_link_net(struct rtnl_newlink_params *p)
+{
+ return p->link_net ? : p->src_net;
+}
+
+/* Get peer netns from newlink params. Fallback to link netns if peer netns is
+ * not specified explicitly.
+ */
+static inline struct net *rtnl_newlink_peer_net(struct rtnl_newlink_params *p)
+{
+ return p->peer_net ? : rtnl_newlink_link_net(p);
+}
+
+/**
* struct rtnl_link_ops - rtnetlink link operations
*
* @list: Used internally, protected by link_ops_mutex and SRCU
@@ -125,10 +159,8 @@ struct rtnl_link_ops {
struct nlattr *data[],
struct netlink_ext_ack *extack);
- int (*newlink)(struct net *src_net,
- struct net_device *dev,
- struct nlattr *tb[],
- struct nlattr *data[],
+ int (*newlink)(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack);
int (*changelink)(struct net_device *dev,
struct nlattr *tb[],
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 24e48af7e8f7..1c05fed05f2b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
return skb;
}
+static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue(&sch->gso_skb);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
+ if (direct)
+ return __qdisc_dequeue_head(&sch->q);
+ else
+ return sch->dequeue(sch);
+}
+
static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
{
struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
@@ -1244,6 +1259,14 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_DROP;
}
+static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free,
+ enum skb_drop_reason reason)
+{
+ tcf_set_drop_reason(skb, reason);
+ return qdisc_drop(skb, sch, to_free);
+}
+
static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
diff --git a/include/net/scm.h b/include/net/scm.h
index 0d35c7c77a74..22bb49589fde 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -105,16 +105,16 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
#ifdef CONFIG_SECURITY_NETWORK
static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
{
- char *secdata;
- u32 seclen;
+ struct lsm_context ctx;
int err;
if (test_bit(SOCK_PASSSEC, &sock->flags)) {
- err = security_secid_to_secctx(scm->secid, &secdata, &seclen);
+ err = security_secid_to_secctx(scm->secid, &ctx);
- if (!err) {
- put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, seclen, secdata);
- security_release_secctx(secdata, seclen);
+ if (err >= 0) {
+ put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, ctx.len,
+ ctx.context);
+ security_release_secctx(&ctx);
}
}
}
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index f514a0aa849e..291465c25810 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -30,17 +30,14 @@
static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum)
{
- /* This uses the crypto implementation of crc32c, which is either
- * implemented w/ hardware support or resolves to __crc32c_le().
- */
return (__force __wsum)crc32c((__force __u32)sum, buff, len);
}
static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
int offset, int len)
{
- return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
- (__force __u32)csum2, len);
+ return (__force __wsum)crc32c_combine((__force __u32)csum,
+ (__force __u32)csum2, len);
}
static const struct skb_checksum_ops sctp_csum_ops = {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 84e6b9fd5610..d8da764cf6de 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -636,7 +636,7 @@ static inline void sctp_transport_pl_reset(struct sctp_transport *t)
}
} else {
if (t->pl.state != SCTP_PL_DISABLED) {
- if (del_timer(&t->probe_timer))
+ if (timer_delete(&t->probe_timer))
sctp_transport_put(t);
t->pl.state = SCTP_PL_DISABLED;
}
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 31248cfdfb23..dcd288fa1bb6 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -775,6 +775,7 @@ struct sctp_transport {
/* Reference counting. */
refcount_t refcnt;
+ __u32 dead:1,
/* RTO-Pending : A flag used to track if one of the DATA
* chunks sent to this address is currently being
* used to compute a RTT. If this flag is 0,
@@ -784,7 +785,7 @@ struct sctp_transport {
* calculation completes (i.e. the DATA chunk
* is SACK'd) clear this flag.
*/
- __u32 rto_pending:1,
+ rto_pending:1,
/*
* hb_sent : a flag that signals that we have a pending
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 468a67836e2f..4cb4326dfebe 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -159,7 +159,7 @@ struct linux_tls_mib {
#define __SNMP_ADD_STATS64(mib, field, addend) \
do { \
- __typeof__(*mib) *ptr = raw_cpu_ptr(mib); \
+ TYPEOF_UNQUAL(*mib) *ptr = raw_cpu_ptr(mib); \
u64_stats_update_begin(&ptr->syncp); \
ptr->mibs[field] += addend; \
u64_stats_update_end(&ptr->syncp); \
@@ -176,8 +176,7 @@ struct linux_tls_mib {
#define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
#define __SNMP_UPD_PO_STATS64(mib, basefield, addend) \
do { \
- __typeof__(*mib) *ptr; \
- ptr = raw_cpu_ptr((mib)); \
+ TYPEOF_UNQUAL(*mib) *ptr = raw_cpu_ptr(mib); \
u64_stats_update_begin(&ptr->syncp); \
ptr->mibs[basefield##PKTS]++; \
ptr->mibs[basefield##OCTETS] += addend; \
diff --git a/include/net/sock.h b/include/net/sock.h
index d3efb581c2ff..99470c6d24de 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -303,6 +303,7 @@ struct sk_filter;
* @sk_stamp: time stamp of last packet received
* @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
* @sk_tsflags: SO_TIMESTAMPING flags
+ * @sk_bpf_cb_flags: used in bpf_setsockopt()
* @sk_use_task_frag: allow sk_page_frag() to use current->task_frag.
* Sockets that can be used under memory reclaim should
* set this to false.
@@ -338,6 +339,8 @@ struct sk_filter;
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
* @sk_user_frags: xarray of pages the user is holding a reference on.
+ * @sk_owner: reference to the real owner of the socket that calls
+ * sock_lock_init_class_and_name().
*/
struct sock {
/*
@@ -525,6 +528,8 @@ struct sock {
u8 sk_txtime_deadline_mode : 1,
sk_txtime_report_errors : 1,
sk_txtime_unused : 6;
+#define SK_BPF_CB_FLAG_TEST(SK, FLAG) ((SK)->sk_bpf_cb_flags & (FLAG))
+ u8 sk_bpf_cb_flags;
void *sk_user_data;
#ifdef CONFIG_SECURITY
@@ -544,6 +549,10 @@ struct sock {
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
struct xarray sk_user_frags;
+
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+ struct module *sk_owner;
+#endif
};
struct sock_bh_locked {
@@ -953,6 +962,8 @@ enum sock_flags {
SOCK_XDP, /* XDP is attached */
SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */
+ SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */
+ SOCK_TIMESTAMPING_ANY, /* Copy of sk_tsflags & TSFLAGS_ANY */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1283,10 +1294,6 @@ struct proto {
unsigned int inuse_idx;
#endif
-#if IS_ENABLED(CONFIG_MPTCP)
- int (*forward_alloc_get)(const struct sock *sk);
-#endif
-
bool (*stream_memory_free)(const struct sock *sk, int wake);
bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
@@ -1347,15 +1354,6 @@ int sock_load_diag_module(int family, int protocol);
INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
-static inline int sk_forward_alloc_get(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_MPTCP)
- if (sk->sk_prot->forward_alloc_get)
- return sk->sk_prot->forward_alloc_get(sk);
-#endif
- return READ_ONCE(sk->sk_forward_alloc);
-}
-
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
@@ -1591,6 +1589,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
sk_mem_reclaim(sk);
}
+#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+ __module_get(owner);
+ sk->sk_owner = owner;
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+ sk->sk_owner = NULL;
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+ module_put(sk->sk_owner);
+}
+#else
+static inline void sk_owner_set(struct sock *sk, struct module *owner)
+{
+}
+
+static inline void sk_owner_clear(struct sock *sk)
+{
+}
+
+static inline void sk_owner_put(struct sock *sk)
+{
+}
+#endif
/*
* Macro so as to not evaluate some arguments when
* lockdep is not enabled.
@@ -1600,13 +1627,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
*/
#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \
do { \
+ sk_owner_set(sk, THIS_MODULE); \
sk->sk_lock.owned = 0; \
init_waitqueue_head(&sk->sk_lock.wq); \
spin_lock_init(&(sk)->sk_lock.slock); \
debug_check_no_locks_freed((void *)&(sk)->sk_lock, \
- sizeof((sk)->sk_lock)); \
+ sizeof((sk)->sk_lock)); \
lockdep_set_class_and_name(&(sk)->sk_lock.slock, \
- (skey), (sname)); \
+ (skey), (sname)); \
lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
} while (0)
@@ -1805,6 +1833,8 @@ static inline struct sk_buff *sock_alloc_send_skb(struct sock *sk,
}
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority);
+void *sock_kmemdup(struct sock *sk, const void *src,
+ int size, gfp_t priority);
void sock_kfree_s(struct sock *sk, void *mem, int size);
void sock_kzfree_s(struct sock *sk, void *mem, int size);
void sk_send_sigurg(struct sock *sk);
@@ -1821,13 +1851,16 @@ struct sockcm_cookie {
u32 mark;
u32 tsflags;
u32 ts_opt_id;
+ u32 priority;
};
static inline void sockcm_init(struct sockcm_cookie *sockc,
const struct sock *sk)
{
*sockc = (struct sockcm_cookie) {
- .tsflags = READ_ONCE(sk->sk_tsflags)
+ .mark = READ_ONCE(sk->sk_mark),
+ .tsflags = READ_ONCE(sk->sk_tsflags),
+ .priority = READ_ONCE(sk->sk_priority),
};
}
@@ -2662,12 +2695,13 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
{
#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_RCVTSTAMP) | \
- (1UL << SOCK_RCVMARK))
+ (1UL << SOCK_RCVMARK) | \
+ (1UL << SOCK_RCVPRIORITY) | \
+ (1UL << SOCK_TIMESTAMPING_ANY))
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_RECV_CMSGS ||
- READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+ if (READ_ONCE(sk->sk_flags) & FLAGS_RECV_CMSGS)
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
@@ -2917,6 +2951,13 @@ int sock_set_timestamping(struct sock *sk, int optname,
struct so_timestamping timestamping);
void sock_enable_timestamps(struct sock *sk);
+#if defined(CONFIG_CGROUP_BPF)
+void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op);
+#else
+static inline void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op)
+{
+}
+#endif
void sock_no_linger(struct sock *sk);
void sock_set_keepalive(struct sock *sk);
void sock_set_priority(struct sock *sk, u32 priority);
@@ -2937,8 +2978,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
- if (sk->sk_prot->sock_is_readable)
- return sk->sk_prot->sock_is_readable(sk);
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot->sock_is_readable)
+ return prot->sock_is_readable(sk);
+
return false;
}
#endif /* _SOCK_H */
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 41e2ce9e9e10..0a83010b3a64 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -43,6 +43,8 @@ struct strparser;
struct strp_callbacks {
int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+ int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
int (*read_sock_done)(struct strparser *strp, int err);
void (*abort_parser)(struct strparser *strp, int err);
void (*lock)(struct strparser *strp);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bc04599547c3..4450c384ef17 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -26,6 +26,7 @@
#include <linux/kref.h>
#include <linux/ktime.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/bits.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -144,8 +145,9 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
#define TCP_DELACK_MIN 4U
#define TCP_ATO_MIN 4U
#endif
-#define TCP_RTO_MAX ((unsigned)(120*HZ))
-#define TCP_RTO_MIN ((unsigned)(HZ/5))
+#define TCP_RTO_MAX_SEC 120
+#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ))
+#define TCP_RTO_MIN ((unsigned)(HZ / 5))
#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
@@ -372,16 +374,53 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
}
}
-#define TCP_ECN_OK 1
-#define TCP_ECN_QUEUE_CWR 2
-#define TCP_ECN_DEMAND_CWR 4
-#define TCP_ECN_SEEN 8
+#define TCP_ECN_MODE_RFC3168 BIT(0)
+#define TCP_ECN_QUEUE_CWR BIT(1)
+#define TCP_ECN_DEMAND_CWR BIT(2)
+#define TCP_ECN_SEEN BIT(3)
+#define TCP_ECN_MODE_ACCECN BIT(4)
+
+#define TCP_ECN_DISABLED 0
+#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
+#define TCP_ECN_MODE_ANY (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
+
+static inline bool tcp_ecn_mode_any(const struct tcp_sock *tp)
+{
+ return tp->ecn_flags & TCP_ECN_MODE_ANY;
+}
+
+static inline bool tcp_ecn_mode_rfc3168(const struct tcp_sock *tp)
+{
+ return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_RFC3168;
+}
+
+static inline bool tcp_ecn_mode_accecn(const struct tcp_sock *tp)
+{
+ return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_ACCECN;
+}
+
+static inline bool tcp_ecn_disabled(const struct tcp_sock *tp)
+{
+ return !tcp_ecn_mode_any(tp);
+}
+
+static inline bool tcp_ecn_mode_pending(const struct tcp_sock *tp)
+{
+ return (tp->ecn_flags & TCP_ECN_MODE_PENDING) == TCP_ECN_MODE_PENDING;
+}
+
+static inline void tcp_ecn_mode_set(struct tcp_sock *tp, u8 mode)
+{
+ tp->ecn_flags &= ~TCP_ECN_MODE_ANY;
+ tp->ecn_flags |= mode;
+}
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
TCP_TW_RST = 1,
TCP_TW_ACK = 2,
- TCP_TW_SYN = 3
+ TCP_TW_SYN = 3,
+ TCP_TW_ACK_OOW = 4
};
@@ -391,7 +430,7 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
u32 *tw_isn);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
- bool *lost_race);
+ bool *lost_race, enum skb_drop_reason *drop_reason);
enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
@@ -416,6 +455,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
+void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
@@ -667,7 +707,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
enum sk_rst_reason reason);
int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now);
-void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags);
void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk);
@@ -743,6 +783,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
void tcp_read_done(struct sock *sk, size_t len);
@@ -753,10 +796,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu);
int tcp_mss_to_mtu(struct sock *sk, int mss);
void tcp_mtup_init(struct sock *sk);
+static inline unsigned int tcp_rto_max(const struct sock *sk)
+{
+ return READ_ONCE(inet_csk(sk)->icsk_rto_max);
+}
+
static inline void tcp_bound_rto(struct sock *sk)
{
- if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
- inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
+ inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk));
}
static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
@@ -797,7 +844,7 @@ u32 tcp_delack_max(const struct sock *sk);
static inline u32 tcp_rto_min(const struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = inet_csk(sk)->icsk_rto_min;
+ u32 rto_min = READ_ONCE(inet_csk(sk)->icsk_rto_min);
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
@@ -925,15 +972,22 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
-#define TCPHDR_FIN 0x01
-#define TCPHDR_SYN 0x02
-#define TCPHDR_RST 0x04
-#define TCPHDR_PSH 0x08
-#define TCPHDR_ACK 0x10
-#define TCPHDR_URG 0x20
-#define TCPHDR_ECE 0x40
-#define TCPHDR_CWR 0x80
-
+#define TCPHDR_FIN BIT(0)
+#define TCPHDR_SYN BIT(1)
+#define TCPHDR_RST BIT(2)
+#define TCPHDR_PSH BIT(3)
+#define TCPHDR_ACK BIT(4)
+#define TCPHDR_URG BIT(5)
+#define TCPHDR_ECE BIT(6)
+#define TCPHDR_CWR BIT(7)
+#define TCPHDR_AE BIT(8)
+#define TCPHDR_FLAGS_MASK (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
+ TCPHDR_PSH | TCPHDR_ACK | TCPHDR_URG | \
+ TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
+#define tcp_flags_ntohs(th) (ntohs(*(__be16 *)&tcp_flag_word(th)) & \
+ TCPHDR_FLAGS_MASK)
+
+#define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
/* State flags for sacked in struct tcp_skb_cb */
@@ -968,14 +1022,16 @@ struct tcp_skb_cb {
u16 tcp_gso_size;
};
};
- __u8 tcp_flags; /* TCP header flags. (tcp[13]) */
+ __u16 tcp_flags; /* TCP header flags (tcp[12-13])*/
__u8 sacked; /* State flags for SACK. */
__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
- __u8 txstamp_ack:1, /* Record TX timestamp for ack? */
+#define TSTAMP_ACK_SK 0x1
+#define TSTAMP_ACK_BPF 0x2
+ __u8 txstamp_ack:2, /* Record TX timestamp for ack? */
eor:1, /* Is skb MSG_EOR marked? */
has_rxtstamp:1, /* SKB has a RX timestamp */
- unused:5;
+ unused:4;
__u32 ack_seq; /* Sequence number ACK'd */
union {
struct {
@@ -1121,9 +1177,9 @@ enum tcp_ca_ack_event_flags {
#define TCP_CA_UNSPEC 0
/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
-#define TCP_CONG_NON_RESTRICTED 0x1
+#define TCP_CONG_NON_RESTRICTED BIT(0)
/* Requires ECN/ECT set on all packets */
-#define TCP_CONG_NEEDS_ECN 0x2
+#define TCP_CONG_NEEDS_ECN BIT(1)
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
union tcp_cc_info;
@@ -1437,10 +1493,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk)
static inline void tcp_reset_xmit_timer(struct sock *sk,
const int what,
unsigned long when,
- const unsigned long max_when)
+ bool pace_delay)
{
- inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
- max_when);
+ if (pace_delay)
+ when += tcp_pacing_delay(sk);
+ inet_csk_reset_xmit_timer(sk, what, when,
+ tcp_rto_max(sk));
}
/* Something is really bad, we could not queue an additional packet,
@@ -1469,7 +1527,7 @@ static inline void tcp_check_probe_timer(struct sock *sk)
{
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- tcp_probe0_base(sk), TCP_RTO_MAX);
+ tcp_probe0_base(sk), true);
}
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
@@ -1831,7 +1889,7 @@ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp,
* @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
* @c: returned tcp_sigpool for usage (uninitialized on failure)
*
- * Returns 0 on success, error otherwise.
+ * Returns: 0 on success, error otherwise.
*/
int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
/**
@@ -2585,8 +2643,8 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
- int (*get_info)(struct sock *sk, struct sk_buff *skb);
- size_t (*get_info_size)(const struct sock *sk);
+ int (*get_info)(struct sock *sk, struct sk_buff *skb, bool net_admin);
+ size_t (*get_info_size)(const struct sock *sk, bool net_admin);
/* clone ulp */
void (*clone)(const struct request_sock *req, struct sock *newsk,
const gfp_t priority);
@@ -2613,6 +2671,11 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+#ifdef CONFIG_BPF_STREAM_PARSER
+struct strparser;
+int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
+#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET
@@ -2663,6 +2726,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
if (sk_fullsock(sk)) {
sock_ops.is_fullsock = 1;
+ sock_ops.is_locked_tcp_sock = 1;
sock_owned_by_me(sk);
}
@@ -2751,9 +2815,9 @@ extern struct static_key_false tcp_have_smc;
#endif
#if IS_ENABLED(CONFIG_TLS_DEVICE)
-void clean_acked_data_enable(struct inet_connection_sock *icsk,
+void clean_acked_data_enable(struct tcp_sock *tp,
void (*cad)(struct sock *sk, u32 ack_seq));
-void clean_acked_data_disable(struct inet_connection_sock *icsk);
+void clean_acked_data_disable(struct tcp_sock *tp);
void clean_acked_data_flush(void);
#endif
diff --git a/include/net/tls.h b/include/net/tls.h
index 61fef2880114..857340338b69 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -59,6 +59,8 @@ struct tls_rec;
#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type)
+#define TLS_HANDSHAKE_KEYUPDATE 24 /* rfc8446 B.3: Key update */
+
#define TLS_AAD_SPACE_SIZE 13
#define TLS_MAX_IV_SIZE 16
@@ -130,6 +132,7 @@ struct tls_sw_context_rx {
u8 async_capable:1;
u8 zc_capable:1;
u8 reader_contended:1;
+ bool key_update_pending;
struct tls_strparser strp;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 33ba6fc151cf..2dd23ee2bacd 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -227,6 +227,7 @@ struct vxlan_config {
unsigned int addrmax;
bool no_share;
enum ifla_vxlan_df df;
+ struct vxlanhdr reserved_bits;
};
enum {
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b5b10f2b88e5..48efacbaa35d 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -11,6 +11,8 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h> /* skb_shared_info */
+#include <net/page_pool/types.h>
+
/**
* DOC: XDP RX-queue information
*
@@ -87,7 +89,7 @@ struct xdp_buff {
u32 flags; /* supported values defined in xdp_buff_flags */
};
-static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
+static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
}
@@ -102,7 +104,8 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
}
-static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp)
+static __always_inline bool
+xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
}
@@ -143,15 +146,16 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
static inline struct skb_shared_info *
-xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
+xdp_get_shared_info_from_buff(const struct xdp_buff *xdp)
{
return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}
-static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp)
+static __always_inline unsigned int
+xdp_get_buff_len(const struct xdp_buff *xdp)
{
unsigned int len = xdp->data_end - xdp->data;
- struct skb_shared_info *sinfo;
+ const struct skb_shared_info *sinfo;
if (likely(!xdp_buff_has_frags(xdp)))
goto out;
@@ -162,26 +166,114 @@ out:
return len;
}
+void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp);
+
+/**
+ * __xdp_buff_add_frag - attach frag to &xdp_buff
+ * @xdp: XDP buffer to attach the frag to
+ * @netmem: network memory containing the frag
+ * @offset: offset at which the frag starts
+ * @size: size of the frag
+ * @truesize: total memory size occupied by the frag
+ * @try_coalesce: whether to try coalescing the frags (not valid for XSk)
+ *
+ * Attach frag to the XDP buffer. If it currently has no frags attached,
+ * initialize the related fields, otherwise check that the frag number
+ * didn't reach the limit of ``MAX_SKB_FRAGS``. If possible, try coalescing
+ * the frag with the previous one.
+ * The function doesn't check/update the pfmemalloc bit. Please use the
+ * non-underscored wrapper in drivers.
+ *
+ * Return: true on success, false if there's no space for the frag in
+ * the shared info struct.
+ */
+static inline bool __xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem,
+ u32 offset, u32 size, u32 truesize,
+ bool try_coalesce)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ skb_frag_t *prev;
+ u32 nr_frags;
+
+ if (!xdp_buff_has_frags(xdp)) {
+ xdp_buff_set_frags_flag(xdp);
+
+ nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ sinfo->xdp_frags_truesize = 0;
+
+ goto fill;
+ }
+
+ nr_frags = sinfo->nr_frags;
+ prev = &sinfo->frags[nr_frags - 1];
+
+ if (try_coalesce && netmem == skb_frag_netmem(prev) &&
+ offset == skb_frag_off(prev) + skb_frag_size(prev)) {
+ skb_frag_size_add(prev, size);
+ /* Guaranteed to only decrement the refcount */
+ xdp_return_frag(netmem, xdp);
+ } else if (unlikely(nr_frags == MAX_SKB_FRAGS)) {
+ return false;
+ } else {
+fill:
+ __skb_fill_netmem_desc_noacc(sinfo, nr_frags++, netmem,
+ offset, size);
+ }
+
+ sinfo->nr_frags = nr_frags;
+ sinfo->xdp_frags_size += size;
+ sinfo->xdp_frags_truesize += truesize;
+
+ return true;
+}
+
+/**
+ * xdp_buff_add_frag - attach frag to &xdp_buff
+ * @xdp: XDP buffer to attach the frag to
+ * @netmem: network memory containing the frag
+ * @offset: offset at which the frag starts
+ * @size: size of the frag
+ * @truesize: total memory size occupied by the frag
+ *
+ * Version of __xdp_buff_add_frag() which takes care of the pfmemalloc bit.
+ *
+ * Return: true on success, false if there's no space for the frag in
+ * the shared info struct.
+ */
+static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem,
+ u32 offset, u32 size, u32 truesize)
+{
+ if (!__xdp_buff_add_frag(xdp, netmem, offset, size, truesize, true))
+ return false;
+
+ if (unlikely(netmem_is_pfmemalloc(netmem)))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+
+ return true;
+}
+
struct xdp_frame {
void *data;
- u16 len;
- u16 headroom;
+ u32 len;
+ u32 headroom;
u32 metasize; /* uses lower 8-bits */
/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
- * while mem info is valid on remote CPU.
+ * while mem_type is valid on remote CPU.
*/
- struct xdp_mem_info mem;
+ enum xdp_mem_type mem_type:32;
struct net_device *dev_rx; /* used by cpumap */
u32 frame_sz;
u32 flags; /* supported values defined in xdp_buff_flags */
};
-static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
+static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame)
{
return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
}
-static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame)
+static __always_inline bool
+xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame)
{
return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC);
}
@@ -189,18 +281,16 @@ static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame
#define XDP_BULK_QUEUE_SIZE 16
struct xdp_frame_bulk {
int count;
- void *xa;
- void *q[XDP_BULK_QUEUE_SIZE];
+ netmem_ref q[XDP_BULK_QUEUE_SIZE];
};
static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
{
- /* bq->count will be zero'ed when bq->xa gets updated */
- bq->xa = NULL;
+ bq->count = 0;
}
static inline struct skb_shared_info *
-xdp_get_shared_info_from_frame(struct xdp_frame *frame)
+xdp_get_shared_info_from_frame(const struct xdp_frame *frame)
{
void *data_hard_start = frame->data - frame->headroom - sizeof(*frame);
@@ -226,7 +316,14 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
unsigned int size, unsigned int truesize,
bool pfmemalloc)
{
- skb_shinfo(skb)->nr_frags = nr_frags;
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ sinfo->nr_frags = nr_frags;
+ /*
+ * ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``,
+ * reset it after that these fields aren't used anymore.
+ */
+ sinfo->destructor_arg = NULL;
skb->len += size;
skb->data_len += size;
@@ -238,17 +335,19 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
+struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
+struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp);
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev);
struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct net_device *dev);
-int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
static inline
-void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
+void xdp_convert_frame_to_buff(const struct xdp_frame *frame,
+ struct xdp_buff *xdp)
{
xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame);
xdp->data = frame->data;
@@ -259,7 +358,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
}
static inline
-int xdp_update_frame_from_buff(struct xdp_buff *xdp,
+int xdp_update_frame_from_buff(const struct xdp_buff *xdp,
struct xdp_frame *xdp_frame)
{
int metasize, headroom;
@@ -301,24 +400,33 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0))
return NULL;
- /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
- xdp_frame->mem = xdp->rxq->mem;
+ /* rxq only valid until napi_schedule ends, convert to xdp_mem_type */
+ xdp_frame->mem_type = xdp->rxq->mem.type;
return xdp_frame;
}
-void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
- struct xdp_buff *xdp);
+void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
+ bool napi_direct, struct xdp_buff *xdp);
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
-void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
-static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
+static inline void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
+{
+ if (unlikely(!bq->count))
+ return;
+
+ page_pool_put_netmem_bulk(bq->q, bq->count);
+ bq->count = 0;
+}
+
+static __always_inline unsigned int
+xdp_get_frame_len(const struct xdp_frame *xdpf)
{
- struct skb_shared_info *sinfo;
+ const struct skb_shared_info *sinfo;
unsigned int len = xdpf->len;
if (likely(!xdp_frame_has_frags(xdpf)))
@@ -350,6 +458,38 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
int xdp_reg_mem_model(struct xdp_mem_info *mem,
enum xdp_mem_type type, void *allocator);
void xdp_unreg_mem_model(struct xdp_mem_info *mem);
+int xdp_reg_page_pool(struct page_pool *pool);
+void xdp_unreg_page_pool(const struct page_pool *pool);
+void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq,
+ const struct page_pool *pool);
+
+/**
+ * xdp_rxq_info_attach_mem_model - attach registered mem info to RxQ info
+ * @xdp_rxq: XDP RxQ info to attach the memory info to
+ * @mem: already registered memory info
+ *
+ * If the driver registers its memory providers manually, it must use this
+ * function instead of xdp_rxq_info_reg_mem_model().
+ */
+static inline void
+xdp_rxq_info_attach_mem_model(struct xdp_rxq_info *xdp_rxq,
+ const struct xdp_mem_info *mem)
+{
+ xdp_rxq->mem = *mem;
+}
+
+/**
+ * xdp_rxq_info_detach_mem_model - detach registered mem info from RxQ info
+ * @xdp_rxq: XDP RxQ info to detach the memory info from
+ *
+ * If the driver registers its memory providers manually and then attaches it
+ * via xdp_rxq_info_attach_mem_model(), it must call this function before
+ * xdp_rxq_info_unreg().
+ */
+static inline void xdp_rxq_info_detach_mem_model(struct xdp_rxq_info *xdp_rxq)
+{
+ xdp_rxq->mem = (struct xdp_mem_info){ };
+}
/* Drivers not supporting XDP metadata can use this helper, which
* rejects any room expansion for metadata as a result.
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index bfe625b55d55..e8bd6ddb7b12 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -71,9 +71,6 @@ struct xdp_sock {
*/
u32 tx_budget_spent;
- /* Protects generic receive. */
- spinlock_t rx_lock;
-
/* Statistics */
u64 rx_dropped;
u64 rx_queue_full;
@@ -110,11 +107,16 @@ struct xdp_sock {
* indicates position where checksumming should start.
* csum_offset indicates position where checksum should be stored.
*
+ * void (*tmo_request_launch_time)(u64 launch_time, void *priv)
+ * Called when AF_XDP frame requested launch time HW offload support.
+ * launch_time indicates the PTP time at which the device can schedule the
+ * packet for transmission.
*/
struct xsk_tx_metadata_ops {
void (*tmo_request_timestamp)(void *priv);
u64 (*tmo_fill_timestamp)(void *priv);
void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
+ void (*tmo_request_launch_time)(u64 launch_time, void *priv);
};
#ifdef CONFIG_XDP_SOCKETS
@@ -162,6 +164,11 @@ static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
if (!meta)
return;
+ if (ops->tmo_request_launch_time)
+ if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME)
+ ops->tmo_request_launch_time(meta->request.launch_time,
+ priv);
+
if (ops->tmo_request_timestamp)
if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
ops->tmo_request_timestamp(priv);
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 7a7316d9c0da..513c8e9704f6 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -92,7 +92,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return xp_alloc(pool);
}
-static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+static inline bool xsk_is_eop_desc(const struct xdp_desc *desc)
{
return !xp_mb_desc(desc);
}
@@ -127,14 +127,24 @@ out:
xp_free(xskb);
}
-static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+static inline bool xsk_buff_add_frag(struct xdp_buff *head,
+ struct xdp_buff *xdp)
{
- struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
+ const void *data = xdp->data;
+ struct xdp_buff_xsk *frag;
+
+ if (!__xdp_buff_add_frag(head, virt_to_netmem(data),
+ offset_in_page(data), xdp->data_end - data,
+ xdp->frame_sz, false))
+ return false;
+ frag = container_of(xdp, struct xdp_buff_xsk, xdp);
list_add_tail(&frag->list_node, &frag->pool->xskb_list);
+
+ return true;
}
-static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
struct xdp_buff *ret = NULL;
@@ -186,30 +196,56 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return xp_raw_get_data(pool, addr);
}
+/**
+ * xsk_buff_raw_get_ctx - get &xdp_desc context
+ * @pool: XSk buff pool desc address belongs to
+ * @addr: desc address (from userspace)
+ *
+ * Wrapper for xp_raw_get_ctx() to be used in drivers, see its kdoc for
+ * details.
+ *
+ * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata
+ * pointer, if it is present and valid (initialized to %NULL otherwise).
+ */
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return xp_raw_get_ctx(pool, addr);
+}
+
#define XDP_TXMD_FLAGS_VALID ( \
XDP_TXMD_FLAGS_TIMESTAMP | \
XDP_TXMD_FLAGS_CHECKSUM | \
+ XDP_TXMD_FLAGS_LAUNCH_TIME | \
0)
-static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+static inline bool
+xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta)
{
return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
{
struct xsk_tx_metadata *meta;
if (!pool->tx_metadata_len)
return NULL;
- meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+ meta = data - pool->tx_metadata_len;
if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
return NULL; /* no way to signal the error to the user */
return meta;
}
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ return __xsk_buff_get_metadata(pool, xp_raw_get_data(pool, addr));
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -323,7 +359,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return NULL;
}
-static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+static inline bool xsk_is_eop_desc(const struct xdp_desc *desc)
{
return false;
}
@@ -342,11 +378,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{
}
-static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+static inline bool xsk_buff_add_frag(struct xdp_buff *head,
+ struct xdp_buff *xdp)
{
+ return false;
}
-static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
{
return NULL;
}
@@ -375,12 +413,25 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return NULL;
}
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return (struct xdp_desc_ctx){ };
+}
+
static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
{
return false;
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
+{
+ return NULL;
+}
+
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
{
return NULL;
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2c4eda6a8596..1f1861c57e2a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -38,6 +38,7 @@
#define XFRM_PROTO_COMP 108
#define XFRM_PROTO_IPIP 4
#define XFRM_PROTO_IPV6 41
+#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG
#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS
@@ -146,8 +147,19 @@ enum {
};
struct xfrm_dev_offload {
+ /* The device for this offload.
+ * Device drivers should not use this directly, as that will prevent
+ * them from working with bonding device. Instead, the device passed
+ * to the add/delete callbacks should be used.
+ */
struct net_device *dev;
netdevice_tracker dev_tracker;
+ /* This is a private pointer used by the bonding driver (and eventually
+ * should be moved there). Device drivers should not use it.
+ * Protected by xfrm_state.lock AND bond.ipsec_lock in most cases,
+ * except in the .xdo_dev_state_del() flow, where only xfrm_state.lock
+ * is held.
+ */
struct net_device *real_dev;
unsigned long offload_handle;
u8 dir : 2;
@@ -213,6 +225,7 @@ struct xfrm_state {
u16 family;
xfrm_address_t saddr;
int header_len;
+ int enc_hdr_len;
int trailer_len;
u32 extra_flags;
struct xfrm_mark smark;
@@ -234,7 +247,6 @@ struct xfrm_state {
/* Data for encapsulator */
struct xfrm_encap_tmpl *encap;
- struct sock __rcu *encap_sk;
/* NAT keepalive */
u32 nat_keepalive_interval; /* seconds */
@@ -303,6 +315,9 @@ struct xfrm_state {
* interpreted by xfrm_type methods. */
void *data;
u8 dir;
+
+ const struct xfrm_mode_cbs *mode_cbs;
+ void *mode_data;
};
static inline struct net *xs_net(struct xfrm_state *x)
@@ -459,6 +474,54 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
+void xfrm_set_type_offload(struct xfrm_state *x);
+static inline void xfrm_unset_type_offload(struct xfrm_state *x)
+{
+ if (!x->type_offload)
+ return;
+
+ module_put(x->type_offload->owner);
+ x->type_offload = NULL;
+}
+
+/**
+ * struct xfrm_mode_cbs - XFRM mode callbacks
+ * @owner: module owner or NULL
+ * @init_state: Add/init mode specific state in `xfrm_state *x`
+ * @clone_state: Copy mode specific values from `orig` to new state `x`
+ * @destroy_state: Cleanup mode specific state from `xfrm_state *x`
+ * @user_init: Process mode specific netlink attributes from user
+ * @copy_to_user: Add netlink attributes to `attrs` based on state in `x`
+ * @sa_len: Return space required to store mode specific netlink attributes
+ * @get_inner_mtu: Return avail payload space after removing encap overhead
+ * @input: Process received packet from SA using mode
+ * @output: Output given packet using mode
+ * @prepare_output: Add mode specific encapsulation to packet in skb. On return
+ * `transport_header` should point at ESP header, `network_header` should
+ * point at outer IP header and `mac_header` should opint at the
+ * protocol/nexthdr field of the outer IP.
+ *
+ * One should examine and understand the specific uses of these callbacks in
+ * xfrm for further detail on how and when these functions are called. RTSL.
+ */
+struct xfrm_mode_cbs {
+ struct module *owner;
+ int (*init_state)(struct xfrm_state *x);
+ int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig);
+ void (*destroy_state)(struct xfrm_state *x);
+ int (*user_init)(struct net *net, struct xfrm_state *x,
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack);
+ int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
+ unsigned int (*sa_len)(const struct xfrm_state *x);
+ u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
+ int (*input)(struct xfrm_state *x, struct sk_buff *skb);
+ int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
+ int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
+};
+
+int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
+void xfrm_unregister_mode_cbs(u8 mode);
static inline int xfrm_af2proto(unsigned int family)
{
@@ -1716,8 +1779,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
- struct netlink_ext_ack *extack);
+int __xfrm_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack);
int xfrm_init_state(struct xfrm_state *x);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
@@ -1729,6 +1791,15 @@ int xfrm_trans_queue(struct sk_buff *skb,
struct sk_buff *));
int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err);
int xfrm_output(struct sock *sk, struct sk_buff *skb);
+int xfrm4_tunnel_check_size(struct sk_buff *skb);
+#if IS_ENABLED(CONFIG_IPV6)
+int xfrm6_tunnel_check_size(struct sk_buff *skb);
+#else
+static inline int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+ return -EMSGSIZE;
+}
+#endif
#if IS_ENABLED(CONFIG_NET_PKTGEN)
int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index bb03cee716b3..cac56e6b0869 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,7 +29,7 @@ struct xdp_buff_xsk {
dma_addr_t frame_dma;
struct xsk_buff_pool *pool;
struct list_head list_node;
-};
+} __aligned_largest;
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t))
@@ -53,6 +53,8 @@ struct xsk_buff_pool {
refcount_t users;
struct xdp_umem *umem;
struct work_struct work;
+ /* Protects generic receive in shared and non-shared umem mode. */
+ spinlock_t rx_lock;
struct list_head free_list;
struct list_head xskb_list;
u32 heads_cnt;
@@ -141,6 +143,14 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
+
+struct xdp_desc_ctx {
+ dma_addr_t dma;
+ struct xsk_tx_metadata *meta;
+};
+
+struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr);
+
static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
{
return xskb->dma;
@@ -183,7 +193,7 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
!(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
}
-static inline bool xp_mb_desc(struct xdp_desc *desc)
+static inline bool xp_mb_desc(const struct xdp_desc *desc)
{
return desc->options & XDP_PKT_CONTD;
}
@@ -230,8 +240,8 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb,
return orig_addr;
offset = xskb->xdp.data - xskb->xdp.data_hard_start;
- orig_addr -= offset;
offset += pool->headroom;
+ orig_addr -= offset;
return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}