summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/Space.h7
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/bluetooth/bluetooth.h11
-rw-r--r--include/net/bluetooth/hci.h32
-rw-r--r--include/net/bluetooth/hci_core.h137
-rw-r--r--include/net/bluetooth/hci_sync.h5
-rw-r--r--include/net/bluetooth/mgmt.h2
-rw-r--r--include/net/bluetooth/sco.h2
-rw-r--r--include/net/bond_3ad.h1
-rw-r--r--include/net/bonding.h11
-rw-r--r--include/net/busy_poll.h1
-rw-r--r--include/net/caif/cfsrvl.h3
-rw-r--r--include/net/cfg80211.h28
-rw-r--r--include/net/datalink.h2
-rw-r--r--include/net/devlink.h35
-rw-r--r--include/net/dropreason-core.h6
-rw-r--r--include/net/dropreason.h6
-rw-r--r--include/net/dsa.h3
-rw-r--r--include/net/dst_ops.h2
-rw-r--r--include/net/flow_dissector.h14
-rw-r--r--include/net/flow_offload.h6
-rw-r--r--include/net/fq.h5
-rw-r--r--include/net/genetlink.h76
-rw-r--r--include/net/gro.h43
-rw-r--r--include/net/handshake.h5
-rw-r--r--include/net/ieee80211_radiotap.h3
-rw-r--r--include/net/ila.h16
-rw-r--r--include/net/inet6_hashtables.h81
-rw-r--r--include/net/inet_common.h2
-rw-r--r--include/net/inet_connection_sock.h7
-rw-r--r--include/net/inet_hashtables.h76
-rw-r--r--include/net/inet_sock.h101
-rw-r--r--include/net/ip.h17
-rw-r--r--include/net/ip6_fib.h64
-rw-r--r--include/net/ip6_route.h2
-rw-r--r--include/net/ip_tunnels.h1
-rw-r--r--include/net/ipv6.h4
-rw-r--r--include/net/iw_handler.h11
-rw-r--r--include/net/llc_c_ac.h1
-rw-r--r--include/net/llc_c_ev.h1
-rw-r--r--include/net/lwtunnel.h5
-rw-r--r--include/net/mac80211.h6
-rw-r--r--include/net/macsec.h2
-rw-r--r--include/net/mana/gdma.h20
-rw-r--r--include/net/mana/hw_channel.h5
-rw-r--r--include/net/mana/mana.h92
-rw-r--r--include/net/mptcp.h21
-rw-r--r--include/net/ndisc.h3
-rw-r--r--include/net/neighbour.h2
-rw-r--r--include/net/net_namespace.h4
-rw-r--r--include/net/netdev_rx_queue.h53
-rw-r--r--include/net/netfilter/nf_conntrack.h4
-rw-r--r--include/net/netfilter/nf_conntrack_acct.h2
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h2
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h3
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h1
-rw-r--r--include/net/netfilter/nf_tables.h127
-rw-r--r--include/net/netlink.h10
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/netns/nftables.h2
-rw-r--r--include/net/p8022.h3
-rw-r--r--include/net/page_pool.h402
-rw-r--r--include/net/page_pool/helpers.h238
-rw-r--r--include/net/page_pool/types.h236
-rw-r--r--include/net/pkt_cls.h14
-rw-r--r--include/net/route.h12
-rw-r--r--include/net/rtnetlink.h4
-rw-r--r--include/net/sch_generic.h28
-rw-r--r--include/net/sctp/sctp.h2
-rw-r--r--include/net/sctp/sm.h3
-rw-r--r--include/net/sctp/structs.h2
-rw-r--r--include/net/sock.h19
-rw-r--r--include/net/switchdev.h12
-rw-r--r--include/net/tcp.h41
-rw-r--r--include/net/tcx.h206
-rw-r--r--include/net/tls.h14
-rw-r--r--include/net/tls_prot.h68
-rw-r--r--include/net/transp_v6.h2
-rw-r--r--include/net/udp.h4
-rw-r--r--include/net/udplite.h2
-rw-r--r--include/net/vxlan.h4
-rw-r--r--include/net/xdp.h29
-rw-r--r--include/net/xdp_sock.h7
-rw-r--r--include/net/xdp_sock_drv.h54
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/net/xsk_buff_pool.h7
86 files changed, 1848 insertions, 764 deletions
diff --git a/include/net/Space.h b/include/net/Space.h
index 08ca9cef0213..c29f3d51c078 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -3,18 +3,11 @@
* ethernet adaptor have the name "eth[0123...]".
*/
-struct net_device *hp100_probe(int unit);
struct net_device *ultra_probe(int unit);
struct net_device *wd_probe(int unit);
struct net_device *ne_probe(int unit);
-struct net_device *fmv18x_probe(int unit);
-struct net_device *ni65_probe(int unit);
-struct net_device *sonic_probe(int unit);
struct net_device *smc_init(int unit);
struct net_device *cs89x0_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
struct net_device *cops_probe(int unit);
-
-/* Fibre Channel adapters */
-int iph5526_probe(struct net_device *dev);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 0e7504a42925..b01cf9ac2437 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -201,7 +201,6 @@ static inline bool __vsock_in_connected_table(struct vsock_sock *vsk)
return !list_empty(&vsk->connected_table);
}
-void vsock_release_pending(struct sock *pending);
void vsock_add_pending(struct sock *listener, struct sock *pending);
void vsock_remove_pending(struct sock *listener, struct sock *pending);
void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
@@ -225,7 +224,6 @@ struct vsock_tap {
struct list_head list;
};
-int vsock_init_tap(void);
int vsock_add_tap(struct vsock_tap *vt);
int vsock_remove_tap(struct vsock_tap *vt);
void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index af729859385e..aa90adc3b2a4 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -386,6 +386,7 @@ struct bt_sock {
enum {
BT_SK_DEFER_SETUP,
BT_SK_SUSPEND,
+ BT_SK_PKT_STATUS
};
struct bt_sock_list {
@@ -400,6 +401,8 @@ int bt_sock_register(int proto, const struct net_proto_family *ops);
void bt_sock_unregister(int proto);
void bt_sock_link(struct bt_sock_list *l, struct sock *s);
void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
+struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
+ struct proto *prot, int proto, gfp_t prio, int kern);
int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags);
int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -430,10 +433,6 @@ struct l2cap_ctrl {
struct l2cap_chan *chan;
};
-struct sco_ctrl {
- u8 pkt_status;
-};
-
struct hci_dev;
typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
@@ -464,16 +463,18 @@ struct bt_skb_cb {
u8 force_active;
u16 expect;
u8 incoming:1;
+ u8 pkt_status:2;
union {
struct l2cap_ctrl l2cap;
- struct sco_ctrl sco;
struct hci_ctrl hci;
struct mgmt_ctrl mgmt;
+ struct scm_creds creds;
};
};
#define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
#define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type
+#define hci_skb_pkt_status(skb) bt_cb((skb))->pkt_status
#define hci_skb_expect(skb) bt_cb((skb))->expect
#define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode
#define hci_skb_event(skb) bt_cb((skb))->hci.req_event
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 872dcb91a540..87d92accc26e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -309,6 +309,26 @@ enum {
* to support it.
*/
HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT,
+
+ /* When this quirk is set, MSFT extension monitor tracking by
+ * address filter is supported. Since tracking quantity of each
+ * pattern is limited, this feature supports tracking multiple
+ * devices concurrently if controller supports multiple
+ * address filters.
+ *
+ * This quirk must be set before hci_register_dev is called.
+ */
+ HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER,
+
+ /*
+ * When this quirk is set, LE Coded PHY shall not be used. This is
+ * required for some Intel controllers which erroneously claim to
+ * support it but it causes problems with extended scanning.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_LE_CODED,
};
/* HCI device flags */
@@ -577,6 +597,7 @@ enum {
#define HCI_LE_CIS_CENTRAL 0x10
#define HCI_LE_CIS_PERIPHERAL 0x20
#define HCI_LE_ISO_BROADCASTER 0x40
+#define HCI_LE_ISO_SYNC_RECEIVER 0x80
/* Connection modes */
#define HCI_CM_ACTIVE 0x0000
@@ -2760,6 +2781,17 @@ struct hci_ev_le_enh_conn_complete {
__u8 clk_accurancy;
} __packed;
+#define HCI_EV_LE_PER_ADV_REPORT 0x0f
+struct hci_ev_le_per_adv_report {
+ __le16 sync_handle;
+ __u8 tx_power;
+ __u8 rssi;
+ __u8 cte_type;
+ __u8 data_status;
+ __u8 length;
+ __u8 data[];
+} __packed;
+
#define HCI_EV_LE_EXT_ADV_SET_TERM 0x12
struct hci_evt_le_ext_adv_set_term {
__u8 status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e01d52cb668c..e6359f7346f1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -83,7 +83,7 @@ struct discovery_state {
u8 last_adv_addr_type;
s8 last_adv_rssi;
u32 last_adv_flags;
- u8 last_adv_data[HCI_MAX_AD_LENGTH];
+ u8 last_adv_data[HCI_MAX_EXT_AD_LENGTH];
u8 last_adv_data_len;
bool report_invalid_rssi;
bool result_filtering;
@@ -290,7 +290,7 @@ struct adv_pattern {
__u8 ad_type;
__u8 offset;
__u8 length;
- __u8 value[HCI_MAX_AD_LENGTH];
+ __u8 value[HCI_MAX_EXT_AD_LENGTH];
};
struct adv_rssi_thresholds {
@@ -321,8 +321,8 @@ struct adv_monitor {
#define HCI_MAX_SHORT_NAME_LENGTH 10
-#define HCI_CONN_HANDLE_UNSET 0xffff
#define HCI_CONN_HANDLE_MAX 0x0eff
+#define HCI_CONN_HANDLE_UNSET(_handle) (_handle > HCI_CONN_HANDLE_MAX)
/* Min encryption key size to match with SMP */
#define HCI_MIN_ENC_KEY_SIZE 7
@@ -726,7 +726,7 @@ struct hci_conn {
__u16 le_conn_interval;
__u16 le_conn_latency;
__u16 le_supv_timeout;
- __u8 le_adv_data[HCI_MAX_AD_LENGTH];
+ __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH];
__u8 le_adv_data_len;
__u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH];
__u8 le_per_adv_data_len;
@@ -739,6 +739,7 @@ struct hci_conn {
unsigned long flags;
enum conn_reasons conn_reason;
+ __u8 abort_reason;
__u32 clock;
__u16 clock_accuracy;
@@ -758,7 +759,6 @@ struct hci_conn {
struct delayed_work auto_accept_work;
struct delayed_work idle_work;
struct delayed_work le_conn_timeout;
- struct work_struct le_scan_cleanup;
struct device dev;
struct dentry *debugfs;
@@ -974,6 +974,12 @@ enum {
HCI_CONN_SCANNING,
HCI_CONN_AUTH_FAILURE,
HCI_CONN_PER_ADV,
+ HCI_CONN_BIG_CREATED,
+ HCI_CONN_CREATE_CIS,
+ HCI_CONN_BIG_SYNC,
+ HCI_CONN_BIG_SYNC_FAILED,
+ HCI_CONN_PA_SYNC,
+ HCI_CONN_PA_SYNC_FAILED,
};
static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
@@ -1093,8 +1099,7 @@ static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle)
}
static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
- bdaddr_t *ba,
- __u8 big, __u8 bis)
+ bdaddr_t *ba, __u8 bis)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
@@ -1105,7 +1110,33 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev,
if (bacmp(&c->dst, ba) || c->type != ISO_LINK)
continue;
- if (c->iso_qos.bcast.big == big && c->iso_qos.bcast.bis == bis) {
+ if (c->iso_qos.bcast.bis == bis) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *
+hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev,
+ bdaddr_t *ba,
+ __u8 big, __u8 bis)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (bacmp(&c->dst, ba) || c->type != ISO_LINK ||
+ !test_bit(HCI_CONN_PER_ADV, &c->flags))
+ continue;
+
+ if (c->iso_qos.bcast.big == big &&
+ c->iso_qos.bcast.bis == bis) {
rcu_read_unlock();
return c;
}
@@ -1190,7 +1221,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
continue;
/* Match CIG ID if set */
@@ -1222,7 +1253,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
+ if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY))
continue;
if (handle == c->iso_qos.ucast.cig) {
@@ -1259,6 +1290,52 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
return NULL;
}
+static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *hdev,
+ __u8 handle)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type != ISO_LINK)
+ continue;
+
+ if (handle != BT_ISO_QOS_BIG_UNSET && handle == c->iso_qos.bcast.big) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *
+hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type != ISO_LINK ||
+ !test_bit(HCI_CONN_PA_SYNC, &c->flags))
+ continue;
+
+ if (c->iso_qos.bcast.big == big) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
__u8 type, __u16 state)
{
@@ -1320,11 +1397,33 @@ static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
return NULL;
}
+/* Returns true if an le connection is in the scanning state */
+static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type == LE_LINK && c->state == BT_CONNECT &&
+ test_bit(HCI_CONN_SCANNING, &c->flags)) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return false;
+}
+
int hci_disconnect(struct hci_conn *conn, __u8 reason);
bool hci_setup_sync(struct hci_conn *conn, __u16 handle);
void hci_sco_setup(struct hci_conn *conn, __u8 status);
bool hci_iso_setup_path(struct hci_conn *conn);
-int hci_le_create_cis(struct hci_conn *conn);
+int hci_le_create_cis_pending(struct hci_dev *hdev);
+int hci_conn_check_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role);
@@ -1351,6 +1450,9 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
__u16 setting, struct bt_codec *codec);
struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base);
struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, struct bt_iso_qos *qos);
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
@@ -1358,7 +1460,8 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 data_len, __u8 *data);
int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
__u8 sid, struct bt_iso_qos *qos);
-int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
+int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[]);
int hci_conn_check_link_mode(struct hci_conn *conn);
int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
@@ -1369,6 +1472,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
void hci_conn_failed(struct hci_conn *conn, u8 status);
+u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle);
/*
* hci_conn_get() and hci_conn_put() are used to control the life-time of an
@@ -1713,7 +1817,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))
-#define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED))
+#define le_coded_capable(dev) (((dev)->le_features[1] & HCI_LE_PHY_CODED) && \
+ !test_bit(HCI_QUIRK_BROKEN_LE_CODED, \
+ &(dev)->quirks))
#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
@@ -1745,6 +1851,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
/* Extended advertising support */
#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
+/* Maximum advertising length */
+#define max_adv_len(dev) \
+ (ext_adv_capable(dev) ? HCI_MAX_EXT_AD_LENGTH : HCI_MAX_AD_LENGTH)
+
/* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 1789:
*
* C24: Mandatory if the LE Controller supports Connection State and either
@@ -1765,6 +1875,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define cis_peripheral_capable(dev) \
((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL)
#define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER)
+#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER)
#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \
(!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks)))
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 2495be4d8b82..57eeb07aeb25 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -5,6 +5,9 @@
* Copyright (C) 2021 Intel Corporation
*/
+#define UINT_PTR(_handle) ((void *)((uintptr_t)_handle))
+#define PTR_UINT(_ptr) ((uintptr_t)((void *)_ptr))
+
typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data);
typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data,
int err);
@@ -124,7 +127,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason);
int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn);
-int hci_le_create_cis_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_le_create_cis_sync(struct hci_dev *hdev);
int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 5e68b3dd4422..d382679efd2b 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -111,6 +111,8 @@ struct mgmt_rp_read_index_list {
#define MGMT_SETTING_WIDEBAND_SPEECH BIT(17)
#define MGMT_SETTING_CIS_CENTRAL BIT(18)
#define MGMT_SETTING_CIS_PERIPHERAL BIT(19)
+#define MGMT_SETTING_ISO_BROADCASTER BIT(20)
+#define MGMT_SETTING_ISO_SYNC_RECEIVER BIT(21)
#define MGMT_OP_READ_INFO 0x0004
#define MGMT_READ_INFO_SIZE 0
diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h
index 1aa2e14b6c94..f40ddb4264fc 100644
--- a/include/net/bluetooth/sco.h
+++ b/include/net/bluetooth/sco.h
@@ -46,6 +46,4 @@ struct sco_conninfo {
__u8 dev_class[3];
};
-#define SCO_CMSG_PKT_STATUS 0x01
-
#endif /* __SCO_H */
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index a016f275cb01..c5e57c6bd873 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -301,7 +301,6 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave);
int bond_3ad_set_carrier(struct bonding *bond);
-void bond_3ad_update_lacp_active(struct bonding *bond);
void bond_3ad_update_lacp_rate(struct bonding *bond);
void bond_3ad_update_ad_actor_settings(struct bonding *bond);
int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 30ac427cf0c6..5b8b1b644a2d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -722,23 +722,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
}
/* Caller must hold rcu_read_lock() for read */
-static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
+static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac)
{
struct list_head *iter;
struct slave *tmp;
- struct netdev_hw_addr *ha;
bond_for_each_slave_rcu(bond, tmp, iter)
if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
return true;
-
- if (netdev_uc_empty(bond->dev))
- return false;
-
- netdev_for_each_uc_addr(ha, bond->dev)
- if (ether_addr_equal_64bits(mac, ha->addr))
- return true;
-
return false;
}
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index f90f0021f5f2..4dabeb6c76d3 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -16,6 +16,7 @@
#include <linux/sched/clock.h>
#include <linux/sched/signal.h>
#include <net/ip.h>
+#include <net/xdp.h>
/* 0 - Reserved to indicate value not set
* 1..NR_CPUS - Reserved for sender_cpu
diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index bd5440977f7f..5ee7b322e18b 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -33,9 +33,6 @@ struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info,
int mtu_size);
struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
-void cfsrvl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
- int phyid);
-
bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
void cfsrvl_init(struct cfsrvl *service,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7c7d03aa9d06..3a4b684f89bf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -263,7 +263,7 @@ enum ieee80211_privacy {
* are only for driver use when pointers to this structure are
* passed around.
*
- * @flags: rate-specific flags
+ * @flags: rate-specific flags from &enum ieee80211_rate_flags
* @bitrate: bitrate in units of 100 Kbps
* @hw_value: driver/hardware value for this rate
* @hw_value_short: driver/hardware value for this rate when
@@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
return NULL;
+ if (iftype == NL80211_IFTYPE_AP_VLAN)
+ iftype = NL80211_IFTYPE_AP;
+
for (i = 0; i < sband->n_iftype_data; i++) {
const struct ieee80211_sband_iftype_data *data =
&sband->iftype_data[i];
@@ -808,7 +811,7 @@ struct cfg80211_tid_cfg {
struct cfg80211_tid_config {
const u8 *peer;
u32 n_tid_conf;
- struct cfg80211_tid_cfg tid_conf[];
+ struct cfg80211_tid_cfg tid_conf[] __counted_by(n_tid_conf);
};
/**
@@ -1184,7 +1187,7 @@ struct cfg80211_mbssid_elems {
struct {
const u8 *data;
size_t len;
- } elem[];
+ } elem[] __counted_by(cnt);
};
/**
@@ -1201,7 +1204,7 @@ struct cfg80211_rnr_elems {
struct {
const u8 *data;
size_t len;
- } elem[];
+ } elem[] __counted_by(cnt);
};
/**
@@ -1279,7 +1282,7 @@ struct cfg80211_acl_data {
int n_acl_entries;
/* Keep it last */
- struct mac_address mac_addrs[];
+ struct mac_address mac_addrs[] __counted_by(n_acl_entries);
};
/**
@@ -1350,7 +1353,7 @@ struct cfg80211_unsol_bcast_probe_resp {
* @twt_responder: Enable Target Wait Time
* @he_required: stations must support HE
* @sae_h2e_required: stations must support direct H2E technique in SAE
- * @flags: flags, as defined in enum cfg80211_ap_settings_flags
+ * @flags: flags, as defined in &enum nl80211_ap_settings_flags
* @he_obss_pd: OBSS Packet Detection settings
* @he_oper: HE operation IE (or %NULL if HE isn't enabled)
* @fils_discovery: FILS discovery transmission parameters
@@ -1479,7 +1482,6 @@ struct iface_combination_params {
* @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp)
* @STATION_PARAM_APPLY_CAPABILITY: apply new capability
* @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state
- * @STATION_PARAM_APPLY_STA_TXPOWER: apply tx power for STA
*
* Not all station parameters have in-band "no change" signalling,
* for those that don't these flags will are used.
@@ -2153,7 +2155,7 @@ enum mpath_info_flags {
* @sn: target sequence number
* @metric: metric (cost) of this mesh path
* @exptime: expiration time for the mesh path from now, in msecs
- * @flags: mesh path flags
+ * @flags: mesh path flags from &enum mesh_path_flags
* @discovery_timeout: total mesh path discovery timeout, in msecs
* @discovery_retries: mesh path discovery retries
* @generation: generation number for nl80211 dumps.
@@ -2493,7 +2495,7 @@ struct cfg80211_scan_6ghz_params {
* the actual dwell time may be shorter.
* @duration_mandatory: if set, the scan duration must be as specified by the
* %duration field.
- * @flags: bit field of flags controlling operation
+ * @flags: control flags from &enum nl80211_scan_flags
* @rates: bitmap of rates to advertise for each band
* @wiphy: the wiphy this was for
* @scan_start: time (in jiffies) when the scan started
@@ -2541,7 +2543,7 @@ struct cfg80211_scan_request {
struct cfg80211_scan_6ghz_params *scan_6ghz_params;
/* keep last */
- struct ieee80211_channel *channels[];
+ struct ieee80211_channel *channels[] __counted_by(n_channels);
};
static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
@@ -2613,7 +2615,7 @@ struct cfg80211_bss_select_adjust {
* @scan_width: channel width for scanning
* @ie: optional information element(s) to add into Probe Request or %NULL
* @ie_len: length of ie in octets
- * @flags: bit field of flags controlling operation
+ * @flags: control flags from &enum nl80211_scan_flags
* @match_sets: sets of parameters to be matched for a scan result
* entry to be considered valid and to be passed to the host
* (others are filtered out).
@@ -3945,7 +3947,7 @@ struct cfg80211_pmsr_request {
struct list_head list;
- struct cfg80211_pmsr_request_peer peers[];
+ struct cfg80211_pmsr_request_peer peers[] __counted_by(n_peers);
};
/**
@@ -8115,7 +8117,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
* @link_id: the ID of the link the frame was received on
* @buf: Management frame (header + body)
* @len: length of the frame data
- * @flags: flags, as defined in enum nl80211_rxmgmt_flags
+ * @flags: flags, as defined in &enum nl80211_rxmgmt_flags
* @rx_tstamp: Hardware timestamp of frame RX in nanoseconds
* @ack_tstamp: Hardware timestamp of ack TX in nanoseconds
*/
diff --git a/include/net/datalink.h b/include/net/datalink.h
index c837ffc7ebf8..6c529a40e00d 100644
--- a/include/net/datalink.h
+++ b/include/net/datalink.h
@@ -23,6 +23,4 @@ struct datalink_proto {
struct list_head node;
};
-struct datalink_proto *make_EII_client(void);
-void destroy_EII_client(struct datalink_proto *dl);
#endif
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 0cdb4b16e5b5..29fd1b4ee654 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1583,6 +1583,24 @@ void devlink_free(struct devlink *devlink);
* Should be used by device drivers set
* the admin state of a function managed
* by the devlink port.
+ * @port_fn_ipsec_crypto_get: Callback used to get port function's ipsec_crypto
+ * capability. Should be used by device drivers
+ * to report the current state of ipsec_crypto
+ * capability of a function managed by the devlink
+ * port.
+ * @port_fn_ipsec_crypto_set: Callback used to set port function's ipsec_crypto
+ * capability. Should be used by device drivers to
+ * enable/disable ipsec_crypto capability of a
+ * function managed by the devlink port.
+ * @port_fn_ipsec_packet_get: Callback used to get port function's ipsec_packet
+ * capability. Should be used by device drivers
+ * to report the current state of ipsec_packet
+ * capability of a function managed by the devlink
+ * port.
+ * @port_fn_ipsec_packet_set: Callback used to set port function's ipsec_packet
+ * capability. Should be used by device drivers to
+ * enable/disable ipsec_packet capability of a
+ * function managed by the devlink port.
*
* Note: Driver should return -EOPNOTSUPP if it doesn't support
* port function (@port_fn_*) handling for a particular port.
@@ -1620,6 +1638,18 @@ struct devlink_port_ops {
int (*port_fn_state_set)(struct devlink_port *port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_crypto_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_crypto_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_packet_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
};
void devlink_port_init(struct devlink *devlink,
@@ -1743,9 +1773,6 @@ int devl_resource_size_get(struct devlink *devlink,
int devl_dpipe_table_resource_set(struct devlink *devlink,
const char *table_name, u64 resource_id,
u64 resource_units);
-int devlink_dpipe_table_resource_set(struct devlink *devlink,
- const char *table_name, u64 resource_id,
- u64 resource_units);
void devl_resource_occ_get_register(struct devlink *devlink,
u64 resource_id,
devlink_resource_occ_get_t *occ_get,
@@ -1790,8 +1817,6 @@ devlink_port_region_create(struct devlink_port *port,
u32 region_max_snapshots, u64 region_size);
void devl_region_destroy(struct devlink_region *region);
void devlink_region_destroy(struct devlink_region *region);
-void devlink_port_region_destroy(struct devlink_region *region);
-
int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id);
void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id);
int devlink_region_snapshot_create(struct devlink_region *region,
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a2b953b57689..a587e83fc169 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -30,6 +30,7 @@
FN(TCP_OVERWINDOW) \
FN(TCP_OFOMERGE) \
FN(TCP_RFC7323_PAWS) \
+ FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
FN(TCP_RESET) \
FN(TCP_INVALID_SYN) \
@@ -78,6 +79,7 @@
FN(IPV6_NDISC_BAD_CODE) \
FN(IPV6_NDISC_BAD_OPTIONS) \
FN(IPV6_NDISC_NS_OTHERHOST) \
+ FN(QUEUE_PURGE) \
FNe(MAX)
/**
@@ -188,6 +190,8 @@ enum skb_drop_reason {
* LINUX_MIB_PAWSESTABREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
+ /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
+ SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
/** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
@@ -339,6 +343,8 @@ enum skb_drop_reason {
* for another host.
*/
SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
+ /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
+ SKB_DROP_REASON_QUEUE_PURGE,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 685fb37df8e8..56cb7be92244 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -23,6 +23,12 @@ enum skb_drop_reason_subsys {
*/
SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR,
+ /**
+ * @SKB_DROP_REASON_SUBSYS_OPENVSWITCH: openvswitch drop reasons,
+ * see net/openvswitch/drop.h
+ */
+ SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+
/** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */
SKB_DROP_REASON_SUBSYS_NUM
};
diff --git a/include/net/dsa.h b/include/net/dsa.h
index d309ee7ed04b..0b9c6aa27047 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -873,8 +873,6 @@ struct dsa_switch_ops {
struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
int port,
phy_interface_t iface);
- int (*phylink_mac_link_state)(struct dsa_switch *ds, int port,
- struct phylink_link_state *state);
int (*phylink_mac_prepare)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
@@ -884,7 +882,6 @@ struct dsa_switch_ops {
int (*phylink_mac_finish)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
- void (*phylink_mac_an_restart)(struct dsa_switch *ds, int port);
void (*phylink_mac_link_down)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 632086b2f644..6d1c8541183d 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -23,7 +23,7 @@ struct dst_ops {
u32 * (*cow_metrics)(struct dst_entry *, unsigned long);
void (*destroy)(struct dst_entry *);
void (*ifdown)(struct dst_entry *,
- struct net_device *dev, int how);
+ struct net_device *dev);
struct dst_entry * (*negative_advice)(struct dst_entry *);
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8664ed4fbbdf..1a7131d6cb0e 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -302,6 +302,14 @@ struct flow_dissector_key_l2tpv3 {
};
/**
+ * struct flow_dissector_key_ipsec:
+ * @spi: identifier for a ipsec connection
+ */
+struct flow_dissector_key_ipsec {
+ __be32 spi;
+};
+
+/**
* struct flow_dissector_key_cfm
* @mdl_ver: maintenance domain level (mdl) and cfm protocol version
* @opcode: code specifying a type of cfm protocol packet
@@ -354,6 +362,7 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */
FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */
FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */
+ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */
FLOW_DISSECTOR_KEY_MAX,
};
@@ -370,7 +379,8 @@ struct flow_dissector_key {
};
struct flow_dissector {
- unsigned int used_keys; /* each bit repesents presence of one key id */
+ unsigned long long used_keys;
+ /* each bit represents presence of one key id */
unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
};
@@ -430,7 +440,7 @@ void skb_flow_get_icmp_tci(const struct sk_buff *skb,
static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
- return flow_dissector->used_keys & (1 << key_id);
+ return flow_dissector->used_keys & (1ULL << key_id);
}
static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 118082eae48c..9efa9a59e81f 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -64,6 +64,10 @@ struct flow_match_tcp {
struct flow_dissector_key_tcp *key, *mask;
};
+struct flow_match_ipsec {
+ struct flow_dissector_key_ipsec *key, *mask;
+};
+
struct flow_match_mpls {
struct flow_dissector_key_mpls *key, *mask;
};
@@ -116,6 +120,8 @@ void flow_rule_match_ports_range(const struct flow_rule *rule,
struct flow_match_ports_range *out);
void flow_rule_match_tcp(const struct flow_rule *rule,
struct flow_match_tcp *out);
+void flow_rule_match_ipsec(const struct flow_rule *rule,
+ struct flow_match_ipsec *out);
void flow_rule_match_icmp(const struct flow_rule *rule,
struct flow_match_icmp *out);
void flow_rule_match_mpls(const struct flow_rule *rule,
diff --git a/include/net/fq.h b/include/net/fq.h
index 07b5aff6ec58..99fbe4127b95 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -98,9 +98,4 @@ typedef bool fq_skb_filter_t(struct fq *,
struct sk_buff *,
void *);
-typedef struct fq_flow *fq_flow_get_default_t(struct fq *,
- struct fq_tin *,
- int idx,
- struct sk_buff *);
-
#endif
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index ed4622dd4828..e18a4c0d69ee 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -93,9 +93,9 @@ struct genl_family {
* struct genl_info - receiving information
* @snd_seq: sending sequence number
* @snd_portid: netlink portid of sender
+ * @family: generic netlink family
* @nlhdr: netlink message header
* @genlhdr: generic netlink message header
- * @userhdr: user specific header
* @attrs: netlink attributes
* @_net: network namespace
* @user_ptr: user pointers
@@ -104,16 +104,16 @@ struct genl_family {
struct genl_info {
u32 snd_seq;
u32 snd_portid;
- struct nlmsghdr * nlhdr;
+ const struct genl_family *family;
+ const struct nlmsghdr * nlhdr;
struct genlmsghdr * genlhdr;
- void * userhdr;
struct nlattr ** attrs;
possible_net_t _net;
void * user_ptr[2];
struct netlink_ext_ack *extack;
};
-static inline struct net *genl_info_net(struct genl_info *info)
+static inline struct net *genl_info_net(const struct genl_info *info)
{
return read_pnet(&info->_net);
}
@@ -123,6 +123,11 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
write_pnet(&info->_net, net);
}
+static inline void *genl_info_userhdr(const struct genl_info *info)
+{
+ return (u8 *)info->genlhdr + GENL_HDRLEN;
+}
+
#define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
#define GENL_SET_ERR_MSG_FMT(info, msg, args...) \
@@ -244,14 +249,13 @@ struct genl_split_ops {
/**
* struct genl_dumpit_info - info that is available during dumpit op call
- * @family: generic netlink family - for internal genl code usage
* @op: generic netlink ops - for internal genl code usage
* @attrs: netlink attributes
+ * @info: struct genl_info describing the request
*/
struct genl_dumpit_info {
- const struct genl_family *family;
struct genl_split_ops op;
- struct nlattr **attrs;
+ struct genl_info info;
};
static inline const struct genl_dumpit_info *
@@ -260,6 +264,38 @@ genl_dumpit_info(struct netlink_callback *cb)
return cb->data;
}
+static inline const struct genl_info *
+genl_info_dump(struct netlink_callback *cb)
+{
+ return &genl_dumpit_info(cb)->info;
+}
+
+/**
+ * genl_info_init_ntf() - initialize genl_info for notifications
+ * @info: genl_info struct to set up
+ * @family: pointer to the genetlink family
+ * @cmd: command to be used in the notification
+ *
+ * Initialize a locally declared struct genl_info to pass to various APIs.
+ * Intended to be used when creating notifications.
+ */
+static inline void
+genl_info_init_ntf(struct genl_info *info, const struct genl_family *family,
+ u8 cmd)
+{
+ struct genlmsghdr *hdr = (void *) &info->user_ptr[0];
+
+ memset(info, 0, sizeof(*info));
+ info->family = family;
+ info->genlhdr = hdr;
+ hdr->cmd = cmd;
+}
+
+static inline bool genl_info_is_ntf(const struct genl_info *info)
+{
+ return !info->nlhdr;
+}
+
int genl_register_family(struct genl_family *family);
int genl_unregister_family(const struct genl_family *family);
void genl_notify(const struct genl_family *family, struct sk_buff *skb,
@@ -268,6 +304,32 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb,
void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
const struct genl_family *family, int flags, u8 cmd);
+static inline void *
+__genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags)
+{
+ return genlmsg_put(skb, info->snd_portid, info->snd_seq, info->family,
+ flags, info->genlhdr->cmd);
+}
+
+/**
+ * genlmsg_iput - start genetlink message based on genl_info
+ * @skb: skb in which message header will be placed
+ * @info: genl_info as provided to do/dump handlers
+ *
+ * Convenience wrapper which starts a genetlink message based on
+ * information in user request. @info should be either the struct passed
+ * by genetlink core to do/dump handlers (when constructing replies to
+ * such requests) or a struct initialized by genl_info_init_ntf()
+ * when constructing notifications.
+ *
+ * Returns pointer to new genetlink header.
+ */
+static inline void *
+genlmsg_iput(struct sk_buff *skb, const struct genl_info *info)
+{
+ return __genlmsg_iput(skb, info, 0);
+}
+
/**
* genlmsg_nlhdr - Obtain netlink header from user specified header
* @user_hdr: user header as returned from genlmsg_put()
diff --git a/include/net/gro.h b/include/net/gro.h
index 75efa6fb8441..88644b3ca660 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -452,6 +452,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
gro_normal_list(napi);
}
+/* This function is the alternative of 'inet_iif' and 'inet_sdif'
+ * functions in case we can not rely on fields of IPCB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ *iif = inet_iif(skb) ?: skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
+/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
+ * functions in case we can not rely on fields of IP6CB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
+ *iif = skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
extern struct list_head offload_base;
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/handshake.h b/include/net/handshake.h
index 2e26e436e85f..8ebd4f9ed26e 100644
--- a/include/net/handshake.h
+++ b/include/net/handshake.h
@@ -40,5 +40,10 @@ int tls_server_hello_x509(const struct tls_handshake_args *args, gfp_t flags);
int tls_server_hello_psk(const struct tls_handshake_args *args, gfp_t flags);
bool tls_handshake_cancel(struct sock *sk);
+void tls_handshake_close(struct socket *sock);
+
+u8 tls_get_record_type(const struct sock *sk, const struct cmsghdr *msg);
+void tls_alert_recv(const struct sock *sk, const struct msghdr *msg,
+ u8 *level, u8 *description);
#endif /* _NET_HANDSHAKE_H */
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index c4722a9963de..2338f8d2a8b3 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -21,7 +21,7 @@
#include <asm/unaligned.h>
/**
- * struct ieee82011_radiotap_header - base radiotap header
+ * struct ieee80211_radiotap_header - base radiotap header
*/
struct ieee80211_radiotap_header {
/**
@@ -575,6 +575,7 @@ enum ieee80211_radiotap_eht_usig_tb {
/**
* ieee80211_get_radiotap_len - get radiotap header length
+ * @data: pointer to the header
*/
static inline u16 ieee80211_get_radiotap_len(const char *data)
{
diff --git a/include/net/ila.h b/include/net/ila.h
deleted file mode 100644
index 73ebe5eab272..000000000000
--- a/include/net/ila.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * ILA kernel interface
- *
- * Copyright (c) 2015 Tom Herbert <tom@herbertland.com>
- */
-
-#ifndef _NET_ILA_H
-#define _NET_ILA_H
-
-struct sk_buff;
-
-int ila_xlat_outgoing(struct sk_buff *skb);
-int ila_xlat_incoming(struct sk_buff *skb);
-
-#endif /* _NET_ILA_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 56f1286583d3..533a7337865a 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -48,6 +48,22 @@ struct sock *__inet6_lookup_established(struct net *net,
const u16 hnum, const int dif,
const int sdif);
+typedef u32 (inet6_ehashfn_t)(const struct net *net,
+ const struct in6_addr *laddr, const u16 lport,
+ const struct in6_addr *faddr, const __be16 fport);
+
+inet6_ehashfn_t inet6_ehashfn;
+
+INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn);
+
+struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum,
+ inet6_ehashfn_t *ehashfn);
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -57,6 +73,15 @@ struct sock *inet6_lookup_listener(struct net *net,
const unsigned short hnum,
const int dif, const int sdif);
+struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum, const int dif,
+ inet6_ehashfn_t *ehashfn);
+
static inline struct sock *__inet6_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -78,6 +103,46 @@ static inline struct sock *__inet6_lookup(struct net *net,
daddr, hnum, dif, sdif);
}
+static inline
+struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr, const __be16 sport,
+ const struct in6_addr *daddr, const __be16 dport,
+ bool *refcounted, inet6_ehashfn_t *ehashfn)
+{
+ struct sock *sk, *reuse_sk;
+ bool prefetched;
+
+ sk = skb_steal_sock(skb, refcounted, &prefetched);
+ if (!sk)
+ return NULL;
+
+ if (!prefetched || !sk_fullsock(sk))
+ return sk;
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_state != TCP_LISTEN)
+ return sk;
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ if (sk->sk_state != TCP_CLOSE)
+ return sk;
+ } else {
+ return sk;
+ }
+
+ reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, ntohs(dport),
+ ehashfn);
+ if (!reuse_sk)
+ return sk;
+
+ /* We've chosen a new reuseport sock which is never refcounted. This
+ * implies that sk also isn't refcounted.
+ */
+ WARN_ON_ONCE(*refcounted);
+
+ return reuse_sk;
+}
+
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be16 sport,
@@ -85,14 +150,20 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
int iif, int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb, refcounted);
-
+ struct net *net = dev_net(skb_dst(skb)->dev);
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct sock *sk;
+
+ sk = inet6_steal_sock(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, dport,
+ refcounted, inet6_ehashfn);
+ if (IS_ERR(sk))
+ return NULL;
if (sk)
return sk;
- return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
- doff, &ipv6_hdr(skb)->saddr, sport,
- &ipv6_hdr(skb)->daddr, ntohs(dport),
+ return __inet6_lookup(net, hashinfo, skb,
+ doff, &ip6h->saddr, sport,
+ &ip6h->daddr, ntohs(dport),
iif, sdif, refcounted);
}
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index b86b8e21de7f..f50a644d87a9 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -40,8 +40,10 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
+int __inet_listen_sk(struct sock *sk, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
/* Don't allocate port at this moment, defer to connect. */
#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
/* Grab and release socket lock. */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c2b15f7e5516..5d2fcc137b88 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -164,7 +164,8 @@ enum inet_csk_ack_state_t {
ICSK_ACK_TIMER = 2,
ICSK_ACK_PUSHED = 4,
ICSK_ACK_PUSHED2 = 8,
- ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
+ ICSK_ACK_NOW = 16, /* Send the next ACK immediately (once) */
+ ICSK_ACK_NOMEM = 32,
};
void inet_csk_init_xmit_timers(struct sock *sk,
@@ -341,9 +342,9 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline bool inet_csk_has_ulp(struct sock *sk)
+static inline bool inet_csk_has_ulp(const struct sock *sk)
{
- return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+ return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops;
}
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 99bd823e97f6..3ecfeadbfa06 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -177,7 +177,7 @@ struct inet_hashinfo {
struct inet_listen_hashbucket *lhash2;
bool pernet;
-};
+} ____cacheline_aligned_in_smp;
static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
{
@@ -379,6 +379,27 @@ struct sock *__inet_lookup_established(struct net *net,
const __be32 daddr, const u16 hnum,
const int dif, const int sdif);
+typedef u32 (inet_ehashfn_t)(const struct net *net,
+ const __be32 laddr, const __u16 lport,
+ const __be32 faddr, const __be16 fport);
+
+inet_ehashfn_t inet_ehashfn;
+
+INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn);
+
+struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum,
+ inet_ehashfn_t *ehashfn);
+
+struct sock *inet_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum, const int dif,
+ inet_ehashfn_t *ehashfn);
+
static inline struct sock *
inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
@@ -428,6 +449,46 @@ static inline struct sock *inet_lookup(struct net *net,
return sk;
}
+static inline
+struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+ bool *refcounted, inet_ehashfn_t *ehashfn)
+{
+ struct sock *sk, *reuse_sk;
+ bool prefetched;
+
+ sk = skb_steal_sock(skb, refcounted, &prefetched);
+ if (!sk)
+ return NULL;
+
+ if (!prefetched || !sk_fullsock(sk))
+ return sk;
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_state != TCP_LISTEN)
+ return sk;
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ if (sk->sk_state != TCP_CLOSE)
+ return sk;
+ } else {
+ return sk;
+ }
+
+ reuse_sk = inet_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, ntohs(dport),
+ ehashfn);
+ if (!reuse_sk)
+ return sk;
+
+ /* We've chosen a new reuseport sock which is never refcounted. This
+ * implies that sk also isn't refcounted.
+ */
+ WARN_ON_ONCE(*refcounted);
+
+ return reuse_sk;
+}
+
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
int doff,
@@ -436,22 +497,23 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
const int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb, refcounted);
+ struct net *net = dev_net(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
+ struct sock *sk;
+ sk = inet_steal_sock(net, skb, doff, iph->saddr, sport, iph->daddr, dport,
+ refcounted, inet_ehashfn);
+ if (IS_ERR(sk))
+ return NULL;
if (sk)
return sk;
- return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ return __inet_lookup(net, hashinfo, skb,
doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb), sdif,
refcounted);
}
-u32 inet6_ehashfn(const struct net *net,
- const struct in6_addr *laddr, const u16 lport,
- const struct in6_addr *faddr, const __be16 fport);
-
static inline void sk_daddr_set(struct sock *sk, __be32 addr)
{
sk->sk_daddr = addr; /* alias of inet_daddr */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index caa20a905531..2de0e4d4a027 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark &&
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+ u32 mark = READ_ONCE(sk->sk_mark);
+
+ if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
- return sk->sk_mark;
+ return mark;
}
static inline int inet_request_bound_dev_if(const struct sock *sk,
@@ -193,13 +194,13 @@ struct rtable;
* @inet_rcv_saddr - Bound local IPv4 addr
* @inet_dport - Destination port
* @inet_num - Local port
+ * @inet_flags - various atomic flags
* @inet_saddr - Sending source
* @uc_ttl - Unicast TTL
* @inet_sport - Source port
* @inet_id - ID counter for DF pkts
* @tos - TOS
* @mc_ttl - Multicasting TTL
- * @is_icsk - is this an inet_connection_sock?
* @uc_index - Unicast outgoing device index
* @mc_index - Multicast device index
* @mc_list - Group array
@@ -217,57 +218,88 @@ struct inet_sock {
#define inet_dport sk.__sk_common.skc_dport
#define inet_num sk.__sk_common.skc_num
+ unsigned long inet_flags;
__be32 inet_saddr;
__s16 uc_ttl;
- __u16 cmsg_flags;
- struct ip_options_rcu __rcu *inet_opt;
__be16 inet_sport;
- __u16 inet_id;
+ struct ip_options_rcu __rcu *inet_opt;
+ atomic_t inet_id;
__u8 tos;
__u8 min_ttl;
__u8 mc_ttl;
__u8 pmtudisc;
- __u8 recverr:1,
- is_icsk:1,
- freebind:1,
- hdrincl:1,
- mc_loop:1,
- transparent:1,
- mc_all:1,
- nodefrag:1;
- __u8 bind_address_no_port:1,
- recverr_rfc4884:1,
- defer_connect:1; /* Indicates that fastopen_connect is set
- * and cookie exists so we defer connect
- * until first data frame is written
- */
__u8 rcv_tos;
__u8 convert_csum;
int uc_index;
int mc_index;
__be32 mc_addr;
- struct ip_mc_socklist __rcu *mc_list;
- struct inet_cork_full cork;
struct {
__u16 lo;
__u16 hi;
} local_port_range;
+
+ struct ip_mc_socklist __rcu *mc_list;
+ struct inet_cork_full cork;
};
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */
+enum {
+ INET_FLAGS_PKTINFO = 0,
+ INET_FLAGS_TTL = 1,
+ INET_FLAGS_TOS = 2,
+ INET_FLAGS_RECVOPTS = 3,
+ INET_FLAGS_RETOPTS = 4,
+ INET_FLAGS_PASSSEC = 5,
+ INET_FLAGS_ORIGDSTADDR = 6,
+ INET_FLAGS_CHECKSUM = 7,
+ INET_FLAGS_RECVFRAGSIZE = 8,
+
+ INET_FLAGS_RECVERR = 9,
+ INET_FLAGS_RECVERR_RFC4884 = 10,
+ INET_FLAGS_FREEBIND = 11,
+ INET_FLAGS_HDRINCL = 12,
+ INET_FLAGS_MC_LOOP = 13,
+ INET_FLAGS_MC_ALL = 14,
+ INET_FLAGS_TRANSPARENT = 15,
+ INET_FLAGS_IS_ICSK = 16,
+ INET_FLAGS_NODEFRAG = 17,
+ INET_FLAGS_BIND_ADDRESS_NO_PORT = 18,
+ INET_FLAGS_DEFER_CONNECT = 19,
+};
+
/* cmsg flags for inet */
-#define IP_CMSG_PKTINFO BIT(0)
-#define IP_CMSG_TTL BIT(1)
-#define IP_CMSG_TOS BIT(2)
-#define IP_CMSG_RECVOPTS BIT(3)
-#define IP_CMSG_RETOPTS BIT(4)
-#define IP_CMSG_PASSSEC BIT(5)
-#define IP_CMSG_ORIGDSTADDR BIT(6)
-#define IP_CMSG_CHECKSUM BIT(7)
-#define IP_CMSG_RECVFRAGSIZE BIT(8)
+#define IP_CMSG_PKTINFO BIT(INET_FLAGS_PKTINFO)
+#define IP_CMSG_TTL BIT(INET_FLAGS_TTL)
+#define IP_CMSG_TOS BIT(INET_FLAGS_TOS)
+#define IP_CMSG_RECVOPTS BIT(INET_FLAGS_RECVOPTS)
+#define IP_CMSG_RETOPTS BIT(INET_FLAGS_RETOPTS)
+#define IP_CMSG_PASSSEC BIT(INET_FLAGS_PASSSEC)
+#define IP_CMSG_ORIGDSTADDR BIT(INET_FLAGS_ORIGDSTADDR)
+#define IP_CMSG_CHECKSUM BIT(INET_FLAGS_CHECKSUM)
+#define IP_CMSG_RECVFRAGSIZE BIT(INET_FLAGS_RECVFRAGSIZE)
+
+#define IP_CMSG_ALL (IP_CMSG_PKTINFO | IP_CMSG_TTL | \
+ IP_CMSG_TOS | IP_CMSG_RECVOPTS | \
+ IP_CMSG_RETOPTS | IP_CMSG_PASSSEC | \
+ IP_CMSG_ORIGDSTADDR | IP_CMSG_CHECKSUM | \
+ IP_CMSG_RECVFRAGSIZE)
+
+static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
+{
+ return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL;
+}
+
+#define inet_test_bit(nr, sk) \
+ test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_set_bit(nr, sk) \
+ set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_clear_bit(nr, sk) \
+ clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet_assign_bit(nr, sk, val) \
+ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
static inline bool sk_is_inet(struct sock *sk)
{
@@ -362,7 +394,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
__u8 flags = 0;
- if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
+ if (inet_test_bit(TRANSPARENT, sk) || inet_test_bit(HDRINCL, sk))
flags |= FLOWI_FLAG_ANYSRC;
return flags;
}
@@ -388,7 +420,8 @@ static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
- inet->freebind || inet->transparent;
+ test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
+ test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}
static inline bool inet_addr_valid_or_nonlocal(struct net *net,
diff --git a/include/net/ip.h b/include/net/ip.h
index 50d435855ae2..19adacd5ece0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
{
ipcm_init(ipcm);
- ipcm->sockc.mark = inet->sk.sk_mark;
+ ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
@@ -538,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
* generator as much as we can.
*/
if (sk && inet_sk(sk)->inet_daddr) {
- iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += segs;
+ int val;
+
+ /* avoid atomic operations for TCP,
+ * as we hold socket lock at this point.
+ */
+ if (sk_is_tcp(sk)) {
+ sock_owned_by_me(sk);
+ val = atomic_read(&inet_sk(sk)->inet_id);
+ atomic_set(&inet_sk(sk)->inet_id, val + segs);
+ } else {
+ val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
+ }
+ iph->id = htons(val);
return;
}
if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 05e6f756feaf..c9ff23cf313e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -179,6 +179,9 @@ struct fib6_info {
refcount_t fib6_ref;
unsigned long expires;
+
+ struct hlist_node gc_link;
+
struct dst_metrics *fib6_metrics;
#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1]
@@ -247,19 +250,6 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
return rt->fib6_src.plen > 0;
}
-static inline void fib6_clean_expires(struct fib6_info *f6i)
-{
- f6i->fib6_flags &= ~RTF_EXPIRES;
- f6i->expires = 0;
-}
-
-static inline void fib6_set_expires(struct fib6_info *f6i,
- unsigned long expires)
-{
- f6i->expires = expires;
- f6i->fib6_flags |= RTF_EXPIRES;
-}
-
static inline bool fib6_check_expired(const struct fib6_info *f6i)
{
if (f6i->fib6_flags & RTF_EXPIRES)
@@ -267,6 +257,11 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i)
return false;
}
+static inline bool fib6_has_expires(const struct fib6_info *f6i)
+{
+ return f6i->fib6_flags & RTF_EXPIRES;
+}
+
/* Function to safely get fn->fn_sernum for passed in rt
* and store result in passed in cookie.
* Return true if we can get cookie safely
@@ -388,6 +383,7 @@ struct fib6_table {
struct inet_peer_base tb6_peers;
unsigned int flags;
unsigned int fib_seq;
+ struct hlist_head tb6_gc_hlist; /* GC candidates */
#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0)
};
@@ -504,6 +500,48 @@ void fib6_gc_cleanup(void);
int fib6_init(void);
+/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be
+ * NULL.
+ */
+static inline void fib6_set_expires_locked(struct fib6_info *f6i,
+ unsigned long expires)
+{
+ struct fib6_table *tb6;
+
+ tb6 = f6i->fib6_table;
+ f6i->expires = expires;
+ if (tb6 && !fib6_has_expires(f6i))
+ hlist_add_head(&f6i->gc_link, &tb6->tb6_gc_hlist);
+ f6i->fib6_flags |= RTF_EXPIRES;
+}
+
+/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be
+ * NULL. If fib6_table is NULL, the fib6_info will no be inserted into the
+ * list of GC candidates until it is inserted into a table.
+ */
+static inline void fib6_set_expires(struct fib6_info *f6i,
+ unsigned long expires)
+{
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_set_expires_locked(f6i, expires);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
+static inline void fib6_clean_expires_locked(struct fib6_info *f6i)
+{
+ if (fib6_has_expires(f6i))
+ hlist_del_init(&f6i->gc_link);
+ f6i->fib6_flags &= ~RTF_EXPIRES;
+ f6i->expires = 0;
+}
+
+static inline void fib6_clean_expires(struct fib6_info *f6i)
+{
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_clean_expires_locked(f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
struct ipv6_route_iter {
struct seq_net_private p;
struct fib6_walker w;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 3556595ce59a..b32539bb0fb0 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -156,7 +156,7 @@ void fib6_force_start_gc(struct net *net);
struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev,
const struct in6_addr *addr, bool anycast,
- gfp_t gfp_flags);
+ gfp_t gfp_flags, struct netlink_ext_ack *extack);
struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index ed4b6ad3fcac..e8750b4ef7e1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -52,6 +52,7 @@ struct ip_tunnel_key {
u8 tos; /* TOS for IPv4, TC for IPv6 */
u8 ttl; /* TTL for IPv4, HL for IPv6 */
__be32 label; /* Flow Label for IPv6 */
+ u32 nhid;
__be16 tp_src;
__be16 tp_dst;
__u8 flow_flags;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2acc4c808d45..d40d8238d4c2 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -937,7 +937,8 @@ static inline bool ipv6_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
return net->ipv6.sysctl.ip_nonlocal_bind ||
- inet->freebind || inet->transparent;
+ test_bit(INET_FLAGS_FREEBIND, &inet->inet_flags) ||
+ test_bit(INET_FLAGS_TRANSPARENT, &inet->inet_flags);
}
/* Sysctl settings for net ipv6.auto_flowlabels */
@@ -1216,6 +1217,7 @@ void inet6_cleanup_sock(struct sock *sk);
void inet6_sock_destruct(struct sock *sk);
int inet6_release(struct socket *sock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index d2ea5863eedc..b2cf243ebe44 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -426,17 +426,10 @@ struct iw_public_data {
/**************************** PROTOTYPES ****************************/
/*
- * Functions part of the Wireless Extensions (defined in net/core/wireless.c).
- * Those may be called only within the kernel.
+ * Functions part of the Wireless Extensions (defined in net/wireless/wext-core.c).
+ * Those may be called by driver modules.
*/
-/* First : function strictly used inside the kernel */
-
-/* Handle /proc/net/wireless, called in net/code/dev.c */
-int dev_get_wireless_info(char *buffer, char **start, off_t offset, int length);
-
-/* Second : functions that may be called by driver modules */
-
/* Send a single event to user space */
void wireless_send_event(struct net_device *dev, unsigned int cmd,
union iwreq_data *wrqu, const char *extra);
diff --git a/include/net/llc_c_ac.h b/include/net/llc_c_ac.h
index 3e1f76786d7b..7620a9196922 100644
--- a/include/net/llc_c_ac.h
+++ b/include/net/llc_c_ac.h
@@ -175,7 +175,6 @@ int llc_conn_ac_send_ack_if_needed(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_adjust_npta_by_rr(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_adjust_npta_by_rnr(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_rst_sendack_flag(struct sock *sk, struct sk_buff *skb);
-int llc_conn_ac_send_i_rsp_as_ack(struct sock *sk, struct sk_buff *skb);
int llc_conn_ac_send_i_as_ack(struct sock *sk, struct sk_buff *skb);
void llc_conn_busy_tmr_cb(struct timer_list *t);
diff --git a/include/net/llc_c_ev.h b/include/net/llc_c_ev.h
index 3948cf111dd0..241889955157 100644
--- a/include/net/llc_c_ev.h
+++ b/include/net/llc_c_ev.h
@@ -158,7 +158,6 @@ int llc_conn_ev_p_tmr_exp(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_ack_tmr_exp(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_rej_tmr_exp(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_busy_tmr_exp(struct sock *sk, struct sk_buff *skb);
-int llc_conn_ev_sendack_tmr_exp(struct sock *sk, struct sk_buff *skb);
/* NOT_USED functions and their variations */
int llc_conn_ev_rx_xxx_cmd_pbit_set_1(struct sock *sk, struct sk_buff *skb);
int llc_conn_ev_rx_xxx_rsp_fbit_set_1(struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 6f15e6fa154e..53bd2d02a4f0 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -16,9 +16,12 @@
#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2)
+/* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return
+ * values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety.
+ */
enum {
LWTUNNEL_XMIT_DONE,
- LWTUNNEL_XMIT_CONTINUE,
+ LWTUNNEL_XMIT_CONTINUE = 0x100,
};
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3a8a2d2c58c3..7c707358d15c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1192,9 +1192,11 @@ struct ieee80211_tx_info {
u8 ampdu_ack_len;
u8 ampdu_len;
u8 antenna;
+ u8 pad;
u16 tx_time;
u8 flags;
- void *status_driver_data[18 / sizeof(void *)];
+ u8 pad2;
+ void *status_driver_data[16 / sizeof(void *)];
} status;
struct {
struct ieee80211_tx_rate driver_rates[
@@ -2259,6 +2261,7 @@ struct ieee80211_sta_aggregates {
* @he_cap: HE capabilities of this STA
* @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities
* @eht_cap: EHT capabilities of this STA
+ * @agg: per-link data for multi-link aggregation
* @bandwidth: current bandwidth the station can receive with
* @rx_nss: in HT/VHT, the maximum number of spatial streams the
* station can receive at the moment, changed by operating mode
@@ -6612,6 +6615,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
* marks frames marked in the bitmap as having been filtered. Afterwards, it
* checks if any frames in the window starting from @ssn can now be released
* (in case they were only waiting for frames that were filtered.)
+ * (Only work correctly if @max_rx_aggregation_subframes <= 64 frames)
*/
void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
u16 ssn, u64 filtered,
diff --git a/include/net/macsec.h b/include/net/macsec.h
index 441ed8fd4b5f..75a6f4863c83 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -312,6 +312,8 @@ static inline bool macsec_send_sci(const struct macsec_secy *secy)
return tx_sc->send_sci ||
(secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb);
}
+struct net_device *macsec_get_real_dev(const struct net_device *dev);
+bool macsec_netdev_is_offloaded(struct net_device *dev);
static inline void *macsec_netdev_priv(const struct net_device *dev)
{
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 96c120160f15..88b6ef7ce1a6 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -33,6 +33,7 @@ enum gdma_request_type {
GDMA_DESTROY_PD = 30,
GDMA_CREATE_MR = 31,
GDMA_DESTROY_MR = 32,
+ GDMA_QUERY_HWC_TIMEOUT = 84, /* 0x54 */
};
#define GDMA_RESOURCE_DOORBELL_PAGE 27
@@ -57,6 +58,8 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
GDMA_EQE_HWC_INIT_DATA = 130,
GDMA_EQE_HWC_INIT_DONE = 131,
+ GDMA_EQE_HWC_SOC_RECONFIG = 132,
+ GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
};
enum {
@@ -531,10 +534,12 @@ enum {
* so the driver is able to reliably support features like busy_poll.
*/
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
+#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
- GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX)
+ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
+ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
#define GDMA_DRV_CAP_FLAGS2 0
@@ -664,6 +669,19 @@ struct gdma_disable_queue_req {
u32 alloc_res_id_on_creation;
}; /* HW DATA */
+/* GDMA_QUERY_HWC_TIMEOUT */
+struct gdma_query_hwc_timeout_req {
+ struct gdma_req_hdr hdr;
+ u32 timeout_ms;
+ u32 reserved;
+};
+
+struct gdma_query_hwc_timeout_resp {
+ struct gdma_resp_hdr hdr;
+ u32 timeout_ms;
+ u32 reserved;
+};
+
enum atb_page_size {
ATB_PAGE_SIZE_4K,
ATB_PAGE_SIZE_8K,
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 6a757a6e2732..3d3b5c881bc1 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -23,6 +23,10 @@
#define HWC_INIT_DATA_PF_DEST_RQ_ID 10
#define HWC_INIT_DATA_PF_DEST_CQ_ID 11
+#define HWC_DATA_CFG_HWC_TIMEOUT 1
+
+#define HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS 30000
+
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
* them are naturally aligned and hence don't need __packed.
*/
@@ -182,6 +186,7 @@ struct hw_channel_context {
u32 pf_dest_vrq_id;
u32 pf_dest_vrcq_id;
+ u32 hwc_timeout;
struct hwc_caller_ctx *caller_ctx;
};
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 024ad8ddb27e..9f70b4332238 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -4,6 +4,8 @@
#ifndef _MANA_H
#define _MANA_H
+#include <net/xdp.h>
+
#include "gdma.h"
#include "hw_channel.h"
@@ -280,6 +282,7 @@ struct mana_recv_buf_oob {
struct gdma_wqe_request wqe_req;
void *buf_va;
+ bool from_pool; /* allocated from a page pool */
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
@@ -330,6 +333,8 @@ struct mana_rxq {
bool xdp_flush;
int xdp_rc; /* XDP redirect return code */
+ struct page_pool *page_pool;
+
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
*/
@@ -347,6 +352,13 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
+ u64 hc_tx_bytes;
+ u64 hc_tx_ucast_pkts;
+ u64 hc_tx_ucast_bytes;
+ u64 hc_tx_bcast_pkts;
+ u64 hc_tx_bcast_bytes;
+ u64 hc_tx_mcast_pkts;
+ u64 hc_tx_mcast_bytes;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
u64 rx_coalesced_err;
@@ -437,6 +449,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
+void mana_query_gf_stats(struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
@@ -578,6 +591,49 @@ struct mana_fence_rq_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
+/* Query stats RQ */
+struct mana_query_gf_stat_req {
+ struct gdma_req_hdr hdr;
+ u64 req_stats;
+}; /* HW DATA */
+
+struct mana_query_gf_stat_resp {
+ struct gdma_resp_hdr hdr;
+ u64 reported_stats;
+ /* rx errors/discards */
+ u64 discard_rx_nowqe;
+ u64 err_rx_vport_disabled;
+ /* rx bytes/packets */
+ u64 hc_rx_bytes;
+ u64 hc_rx_ucast_pkts;
+ u64 hc_rx_ucast_bytes;
+ u64 hc_rx_bcast_pkts;
+ u64 hc_rx_bcast_bytes;
+ u64 hc_rx_mcast_pkts;
+ u64 hc_rx_mcast_bytes;
+ /* tx errors */
+ u64 err_tx_gf_disabled;
+ u64 err_tx_vport_disabled;
+ u64 err_tx_inval_vport_offset_pkt;
+ u64 err_tx_vlan_enforcement;
+ u64 err_tx_ethtype_enforcement;
+ u64 err_tx_SA_enforecement;
+ u64 err_tx_SQPDID_enforcement;
+ u64 err_tx_CQPDID_enforcement;
+ u64 err_tx_mtu_violation;
+ u64 err_tx_inval_oob;
+ /* tx bytes/packets */
+ u64 hc_tx_bytes;
+ u64 hc_tx_ucast_pkts;
+ u64 hc_tx_ucast_bytes;
+ u64 hc_tx_bcast_pkts;
+ u64 hc_tx_bcast_bytes;
+ u64 hc_tx_mcast_pkts;
+ u64 hc_tx_mcast_bytes;
+ /* tx error */
+ u64 err_tx_gdma;
+}; /* HW DATA */
+
/* Configure vPort Rx Steering */
struct mana_cfg_rx_steer_req_v2 {
struct gdma_req_hdr hdr;
@@ -657,6 +713,42 @@ struct mana_deregister_filter_resp {
struct gdma_resp_hdr hdr;
}; /* HW DATA */
+/* Requested GF stats Flags */
+/* Rx discards/Errors */
+#define STATISTICS_FLAGS_RX_DISCARDS_NO_WQE 0x0000000000000001
+#define STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED 0x0000000000000002
+/* Rx bytes/pkts */
+#define STATISTICS_FLAGS_HC_RX_BYTES 0x0000000000000004
+#define STATISTICS_FLAGS_HC_RX_UCAST_PACKETS 0x0000000000000008
+#define STATISTICS_FLAGS_HC_RX_UCAST_BYTES 0x0000000000000010
+#define STATISTICS_FLAGS_HC_RX_MCAST_PACKETS 0x0000000000000020
+#define STATISTICS_FLAGS_HC_RX_MCAST_BYTES 0x0000000000000040
+#define STATISTICS_FLAGS_HC_RX_BCAST_PACKETS 0x0000000000000080
+#define STATISTICS_FLAGS_HC_RX_BCAST_BYTES 0x0000000000000100
+/* Tx errors */
+#define STATISTICS_FLAGS_TX_ERRORS_GF_DISABLED 0x0000000000000200
+#define STATISTICS_FLAGS_TX_ERRORS_VPORT_DISABLED 0x0000000000000400
+#define STATISTICS_FLAGS_TX_ERRORS_INVAL_VPORT_OFFSET_PACKETS \
+ 0x0000000000000800
+#define STATISTICS_FLAGS_TX_ERRORS_VLAN_ENFORCEMENT 0x0000000000001000
+#define STATISTICS_FLAGS_TX_ERRORS_ETH_TYPE_ENFORCEMENT \
+ 0x0000000000002000
+#define STATISTICS_FLAGS_TX_ERRORS_SA_ENFORCEMENT 0x0000000000004000
+#define STATISTICS_FLAGS_TX_ERRORS_SQPDID_ENFORCEMENT 0x0000000000008000
+#define STATISTICS_FLAGS_TX_ERRORS_CQPDID_ENFORCEMENT 0x0000000000010000
+#define STATISTICS_FLAGS_TX_ERRORS_MTU_VIOLATION 0x0000000000020000
+#define STATISTICS_FLAGS_TX_ERRORS_INVALID_OOB 0x0000000000040000
+/* Tx bytes/pkts */
+#define STATISTICS_FLAGS_HC_TX_BYTES 0x0000000000080000
+#define STATISTICS_FLAGS_HC_TX_UCAST_PACKETS 0x0000000000100000
+#define STATISTICS_FLAGS_HC_TX_UCAST_BYTES 0x0000000000200000
+#define STATISTICS_FLAGS_HC_TX_MCAST_PACKETS 0x0000000000400000
+#define STATISTICS_FLAGS_HC_TX_MCAST_BYTES 0x0000000000800000
+#define STATISTICS_FLAGS_HC_TX_BCAST_PACKETS 0x0000000001000000
+#define STATISTICS_FLAGS_HC_TX_BCAST_BYTES 0x0000000002000000
+/* Tx error */
+#define STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR 0x0000000004000000
+
#define MANA_MAX_NUM_QUEUES 64
#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 3c5c68618fcc..fb996124b3d5 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -96,6 +96,27 @@ struct mptcp_out_options {
#endif
};
+#define MPTCP_SCHED_NAME_MAX 16
+#define MPTCP_SUBFLOWS_MAX 8
+
+struct mptcp_sched_data {
+ bool reinject;
+ u8 subflows;
+ struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
+};
+
+struct mptcp_sched_ops {
+ int (*get_subflow)(struct mptcp_sock *msk,
+ struct mptcp_sched_data *data);
+
+ char name[MPTCP_SCHED_NAME_MAX];
+ struct module *owner;
+ struct list_head list;
+
+ void (*init)(struct mptcp_sock *msk);
+ void (*release)(struct mptcp_sock *msk);
+} ____cacheline_aligned_in_smp;
+
#ifdef CONFIG_MPTCP
void mptcp_init(void);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 52eae0943433..9bbdf6eaa942 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -488,9 +488,6 @@ void igmp6_event_report(struct sk_buff *skb);
#ifdef CONFIG_SYSCTL
int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-int ndisc_ifinfo_sysctl_strategy(struct ctl_table *ctl,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen);
#endif
void inet6_ifinfo_notify(int event, struct inet6_dev *idev);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index f6a8ecc6b1fa..6da68886fabb 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -394,8 +394,6 @@ void neigh_for_each(struct neigh_table *tbl,
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *));
int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *);
-void pneigh_for_each(struct neigh_table *tbl,
- void (*cb)(struct pneigh_entry *));
struct neigh_seq_state {
struct seq_net_private p;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 78beaa765c73..9f6add96de2d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -42,6 +42,7 @@
#include <linux/idr.h>
#include <linux/skbuff.h>
#include <linux/notifier.h>
+#include <linux/xarray.h>
struct user_namespace;
struct proc_dir_entry;
@@ -69,7 +70,7 @@ struct net {
atomic_t dev_unreg_count;
unsigned int dev_base_seq; /* protected by rtnl_mutex */
- int ifindex;
+ u32 ifindex;
spinlock_t nsid_lock;
atomic_t fnhe_genid;
@@ -110,6 +111,7 @@ struct net {
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
+ struct xarray dev_by_index;
struct raw_notifier_head netdev_chain;
/* Note that @hash_mix can be read millions times per second,
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
new file mode 100644
index 000000000000..cdcafb30d437
--- /dev/null
+++ b/include/net/netdev_rx_queue.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NETDEV_RX_QUEUE_H
+#define _LINUX_NETDEV_RX_QUEUE_H
+
+#include <linux/kobject.h>
+#include <linux/netdevice.h>
+#include <linux/sysfs.h>
+#include <net/xdp.h>
+
+/* This structure contains an instance of an RX queue. */
+struct netdev_rx_queue {
+ struct xdp_rxq_info xdp_rxq;
+#ifdef CONFIG_RPS
+ struct rps_map __rcu *rps_map;
+ struct rps_dev_flow_table __rcu *rps_flow_table;
+#endif
+ struct kobject kobj;
+ struct net_device *dev;
+ netdevice_tracker dev_tracker;
+
+#ifdef CONFIG_XDP_SOCKETS
+ struct xsk_buff_pool *pool;
+#endif
+} ____cacheline_aligned_in_smp;
+
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
+ ssize_t (*store)(struct netdev_rx_queue *queue,
+ const char *buf, size_t len);
+};
+
+static inline struct netdev_rx_queue *
+__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
+{
+ return dev->_rx + rxq;
+}
+
+#ifdef CONFIG_SYSFS
+static inline unsigned int
+get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ int index = queue - dev->_rx;
+
+ BUG_ON(index >= dev->num_rx_queues);
+ return index;
+}
+#endif
+#endif
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a72028dbef0c..4085765c3370 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -190,10 +190,6 @@ static inline void nf_ct_put(struct nf_conn *ct)
nf_ct_destroy(&ct->ct_general);
}
-/* Protocol module loading */
-int nf_ct_l3proto_try_module_get(unsigned short l3proto);
-void nf_ct_l3proto_module_put(unsigned short l3proto);
-
/* load module; enable/disable conntrack in this namespace */
int nf_ct_netns_get(struct net *net, u8 nfproto);
void nf_ct_netns_put(struct net *net, u8 nfproto);
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 4b2b7f8914ea..a120685cac93 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -78,6 +78,4 @@ static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
void nf_conntrack_acct_pernet_init(struct net *net);
-void nf_conntrack_acct_fini(void);
-
#endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index cf0d81be5a96..165e7a03b8e9 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -100,7 +100,7 @@ nf_ct_expect_find_get(struct net *net,
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple);
+ const struct nf_conntrack_tuple *tuple, bool unlink);
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report);
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index f30b1694b690..de2f956abf34 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -136,8 +136,6 @@ static inline void *nfct_help_data(const struct nf_conn *ct)
return (void *)help->data;
}
-void nf_conntrack_helper_pernet_init(struct net *net);
-
int nf_conntrack_helper_init(void);
void nf_conntrack_helper_fini(void);
@@ -182,5 +180,4 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat);
int nf_nat_helper_try_module_get(const char *name, u16 l3num,
u8 protonum);
void nf_nat_helper_put(struct nf_conntrack_helper *helper);
-void nf_ct_set_auto_assign_helper_warned(struct net *net);
#endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index 66bab6c60d12..fcb19a4e8f2b 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -52,7 +52,6 @@ int nf_connlabels_replace(struct nf_conn *ct,
const u32 *data, const u32 *mask, unsigned int words);
#ifdef CONFIG_NF_CONNTRACK_LABELS
-int nf_conntrack_labels_init(void);
int nf_connlabels_get(struct net *net, unsigned int bit);
void nf_connlabels_put(struct net *net);
#else
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 640441a2f926..dd40c75011d2 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -512,6 +512,7 @@ struct nft_set_elem_expr {
*
* @list: table set list node
* @bindings: list of set bindings
+ * @refs: internal refcounting for async set destruction
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set
@@ -533,6 +534,7 @@ struct nft_set_elem_expr {
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
+ * @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
@@ -541,6 +543,7 @@ struct nft_set_elem_expr {
struct nft_set {
struct list_head list;
struct list_head bindings;
+ refcount_t refs;
struct nft_table *table;
possible_net_t net;
char *name;
@@ -562,7 +565,8 @@ struct nft_set {
struct list_head pending_update;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- u16 flags:14,
+ u16 flags:13,
+ dead:1,
genmask:2;
u8 klen;
u8 dlen;
@@ -583,6 +587,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
return (void *)set->data;
}
+static inline bool nft_set_gc_is_pending(const struct nft_set *s)
+{
+ return refcount_read(&s->refs) != 1;
+}
+
static inline struct nft_set *nft_set_container_of(const void *priv)
{
return (void *)priv - offsetof(struct nft_set, data);
@@ -596,7 +605,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
@@ -813,62 +821,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem);
-/**
- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- * @rcu: rcu head
- * @set: set the elements belong to
- * @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
- struct rcu_head rcu;
- const struct nft_set *set;
- unsigned int cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
- sizeof(struct nft_set_gc_batch_head)) / \
- sizeof(void *))
-
-/**
- * struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- * @head: GC batch head
- * @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
- struct nft_set_gc_batch_head head;
- void *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
- if (gcb != NULL)
- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
- gfp_t gfp)
-{
- if (gcb != NULL) {
- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
- return gcb;
- nft_set_gc_batch_complete(gcb);
- }
- return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
- void *elem)
-{
- gcb->elems[gcb->head.cnt++] = elem;
-}
-
struct nft_expr_ops;
/**
* struct nft_expr_type - nf_tables expression type
@@ -1557,39 +1509,30 @@ static inline void nft_set_elem_change_active(const struct net *net,
#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
#if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT 2
+#define NFT_SET_ELEM_DEAD_BIT 2
#elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ set_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
- clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
/**
@@ -1732,6 +1675,38 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable_flags(trans) \
(((struct nft_trans_flowtable *)trans->data)->flags)
+#define NFT_TRANS_GC_BATCHCOUNT 256
+
+struct nft_trans_gc {
+ struct list_head list;
+ struct net *net;
+ struct nft_set *set;
+ u32 seq;
+ u8 count;
+ void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct rcu_head rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1758,6 +1733,8 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
+ unsigned int gc_seq;
+ u8 validate_state;
};
extern unsigned int nf_tables_net_id;
diff --git a/include/net/netlink.h b/include/net/netlink.h
index b12cd957abb4..8a7cd1170e1f 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -375,12 +375,11 @@ struct nla_policy {
#define NLA_POLICY_BITFIELD32(valid) \
{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
-#define __NLA_IS_UINT_TYPE(tp) \
- (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || tp == NLA_U64)
+#define __NLA_IS_UINT_TYPE(tp) \
+ (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || \
+ tp == NLA_U64 || tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_IS_SINT_TYPE(tp) \
(tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
-#define __NLA_IS_BEINT_TYPE(tp) \
- (tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
#define NLA_ENSURE_UINT_TYPE(tp) \
@@ -394,7 +393,6 @@ struct nla_policy {
#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \
(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \
__NLA_IS_SINT_TYPE(tp) || \
- __NLA_IS_BEINT_TYPE(tp) || \
tp == NLA_MSECS || \
tp == NLA_BINARY) + tp)
#define NLA_ENSURE_NO_VALIDATION_PTR(tp) \
@@ -402,8 +400,6 @@ struct nla_policy {
tp != NLA_REJECT && \
tp != NLA_NESTED && \
tp != NLA_NESTED_ARRAY) + tp)
-#define NLA_ENSURE_BEINT_TYPE(tp) \
- (__NLA_ENSURE(__NLA_IS_BEINT_TYPE(tp)) + tp)
#define NLA_POLICY_RANGE(tp, _min, _max) { \
.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index f00374718159..7a41c4791536 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -152,7 +152,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_abort_on_overflow;
u8 sysctl_tcp_fack; /* obsolete */
int sysctl_tcp_max_reordering;
- int sysctl_tcp_adv_win_scale;
+ int sysctl_tcp_adv_win_scale; /* obsolete */
u8 sysctl_tcp_dsack;
u8 sysctl_tcp_app_win;
u8 sysctl_tcp_frto;
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 8c77832d0240..cc8060c017d5 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -2,8 +2,6 @@
#ifndef _NETNS_NFTABLES_H_
#define _NETNS_NFTABLES_H_
-#include <linux/list.h>
-
struct netns_nftables {
u8 gencursor;
};
diff --git a/include/net/p8022.h b/include/net/p8022.h
index b690ffcad66b..a29e224ac498 100644
--- a/include/net/p8022.h
+++ b/include/net/p8022.h
@@ -13,7 +13,4 @@ register_8022_client(unsigned char type,
struct packet_type *pt,
struct net_device *orig_dev));
void unregister_8022_client(struct datalink_proto *proto);
-
-struct datalink_proto *make_8023_client(void);
-void destroy_8023_client(struct datalink_proto *dl);
#endif
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
deleted file mode 100644
index 126f9e294389..000000000000
--- a/include/net/page_pool.h
+++ /dev/null
@@ -1,402 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * page_pool.h
- * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
- * Copyright (C) 2016 Red Hat, Inc.
- */
-
-/**
- * DOC: page_pool allocator
- *
- * This page_pool allocator is optimized for the XDP mode that
- * uses one-frame-per-page, but have fallbacks that act like the
- * regular page allocator APIs.
- *
- * Basic use involve replacing alloc_pages() calls with the
- * page_pool_alloc_pages() call. Drivers should likely use
- * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
- *
- * API keeps track of in-flight pages, in-order to let API user know
- * when it is safe to dealloactor page_pool object. Thus, API users
- * must make sure to call page_pool_release_page() when a page is
- * "leaving" the page_pool. Or call page_pool_put_page() where
- * appropiate. For maintaining correct accounting.
- *
- * API user must only call page_pool_put_page() once on a page, as it
- * will either recycle the page, or in case of elevated refcnt, it
- * will release the DMA mapping and in-flight state accounting. We
- * hope to lift this requirement in the future.
- */
-#ifndef _NET_PAGE_POOL_H
-#define _NET_PAGE_POOL_H
-
-#include <linux/mm.h> /* Needed by ptr_ring */
-#include <linux/ptr_ring.h>
-#include <linux/dma-direction.h>
-
-#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
- * map/unmap
- */
-#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
- * from page_pool will be
- * DMA-synced-for-device according to
- * the length provided by the device
- * driver.
- * Please note DMA-sync-for-CPU is still
- * device driver responsibility
- */
-#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
- PP_FLAG_DMA_SYNC_DEV |\
- PP_FLAG_PAGE_FRAG)
-
-/*
- * Fast allocation side cache array/stack
- *
- * The cache size and refill watermark is related to the network
- * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
- * ring is usually refilled and the max consumed elements will be 64,
- * thus a natural max size of objects needed in the cache.
- *
- * Keeping room for more objects, is due to XDP_DROP use-case. As
- * XDP_DROP allows the opportunity to recycle objects directly into
- * this array, as it shares the same softirq/NAPI protection. If
- * cache is already full (or partly full) then the XDP_DROP recycles
- * would have to take a slower code path.
- */
-#define PP_ALLOC_CACHE_SIZE 128
-#define PP_ALLOC_CACHE_REFILL 64
-struct pp_alloc_cache {
- u32 count;
- struct page *cache[PP_ALLOC_CACHE_SIZE];
-};
-
-struct page_pool_params {
- unsigned int flags;
- unsigned int order;
- unsigned int pool_size;
- int nid; /* Numa node id to allocate from pages from */
- struct device *dev; /* device, for DMA pre-mapping purposes */
- struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */
- enum dma_data_direction dma_dir; /* DMA mapping direction */
- unsigned int max_len; /* max DMA sync memory size */
- unsigned int offset; /* DMA addr offset */
- void (*init_callback)(struct page *page, void *arg);
- void *init_arg;
-};
-
-#ifdef CONFIG_PAGE_POOL_STATS
-struct page_pool_alloc_stats {
- u64 fast; /* fast path allocations */
- u64 slow; /* slow-path order 0 allocations */
- u64 slow_high_order; /* slow-path high order allocations */
- u64 empty; /* failed refills due to empty ptr ring, forcing
- * slow path allocation
- */
- u64 refill; /* allocations via successful refill */
- u64 waive; /* failed refills due to numa zone mismatch */
-};
-
-struct page_pool_recycle_stats {
- u64 cached; /* recycling placed page in the cache. */
- u64 cache_full; /* cache was full */
- u64 ring; /* recycling placed page back into ptr ring */
- u64 ring_full; /* page was released from page-pool because
- * PTR ring was full.
- */
- u64 released_refcnt; /* page released because of elevated
- * refcnt
- */
-};
-
-/* This struct wraps the above stats structs so users of the
- * page_pool_get_stats API can pass a single argument when requesting the
- * stats for the page pool.
- */
-struct page_pool_stats {
- struct page_pool_alloc_stats alloc_stats;
- struct page_pool_recycle_stats recycle_stats;
-};
-
-int page_pool_ethtool_stats_get_count(void);
-u8 *page_pool_ethtool_stats_get_strings(u8 *data);
-u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
-
-/*
- * Drivers that wish to harvest page pool stats and report them to users
- * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
- * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
- */
-bool page_pool_get_stats(struct page_pool *pool,
- struct page_pool_stats *stats);
-#else
-
-static inline int page_pool_ethtool_stats_get_count(void)
-{
- return 0;
-}
-
-static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
-{
- return data;
-}
-
-static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
-{
- return data;
-}
-
-#endif
-
-struct page_pool {
- struct page_pool_params p;
-
- struct delayed_work release_dw;
- void (*disconnect)(void *);
- unsigned long defer_start;
- unsigned long defer_warn;
-
- u32 pages_state_hold_cnt;
- unsigned int frag_offset;
- struct page *frag_page;
- long frag_users;
-
-#ifdef CONFIG_PAGE_POOL_STATS
- /* these stats are incremented while in softirq context */
- struct page_pool_alloc_stats alloc_stats;
-#endif
- u32 xdp_mem_id;
-
- /*
- * Data structure for allocation side
- *
- * Drivers allocation side usually already perform some kind
- * of resource protection. Piggyback on this protection, and
- * require driver to protect allocation side.
- *
- * For NIC drivers this means, allocate a page_pool per
- * RX-queue. As the RX-queue is already protected by
- * Softirq/BH scheduling and napi_schedule. NAPI schedule
- * guarantee that a single napi_struct will only be scheduled
- * on a single CPU (see napi_schedule).
- */
- struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
-
- /* Data structure for storing recycled pages.
- *
- * Returning/freeing pages is more complicated synchronization
- * wise, because free's can happen on remote CPUs, with no
- * association with allocation resource.
- *
- * Use ptr_ring, as it separates consumer and producer
- * effeciently, it a way that doesn't bounce cache-lines.
- *
- * TODO: Implement bulk return pages into this structure.
- */
- struct ptr_ring ring;
-
-#ifdef CONFIG_PAGE_POOL_STATS
- /* recycle stats are per-cpu to avoid locking */
- struct page_pool_recycle_stats __percpu *recycle_stats;
-#endif
- atomic_t pages_state_release_cnt;
-
- /* A page_pool is strictly tied to a single RX-queue being
- * protected by NAPI, due to above pp_alloc_cache. This
- * refcnt serves purpose is to simplify drivers error handling.
- */
- refcount_t user_cnt;
-
- u64 destroy_cnt;
-};
-
-struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
-
-static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
-{
- gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
-
- return page_pool_alloc_pages(pool, gfp);
-}
-
-struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
- unsigned int size, gfp_t gfp);
-
-static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
- unsigned int *offset,
- unsigned int size)
-{
- gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
-
- return page_pool_alloc_frag(pool, offset, size, gfp);
-}
-
-/* get the stored dma direction. A driver might decide to treat this locally and
- * avoid the extra cache line from page_pool to determine the direction
- */
-static
-inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
-{
- return pool->p.dma_dir;
-}
-
-bool page_pool_return_skb_page(struct page *page, bool napi_safe);
-
-struct page_pool *page_pool_create(const struct page_pool_params *params);
-
-struct xdp_mem_info;
-
-#ifdef CONFIG_PAGE_POOL
-void page_pool_unlink_napi(struct page_pool *pool);
-void page_pool_destroy(struct page_pool *pool);
-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
- struct xdp_mem_info *mem);
-void page_pool_release_page(struct page_pool *pool, struct page *page);
-void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count);
-#else
-static inline void page_pool_unlink_napi(struct page_pool *pool)
-{
-}
-
-static inline void page_pool_destroy(struct page_pool *pool)
-{
-}
-
-static inline void page_pool_use_xdp_mem(struct page_pool *pool,
- void (*disconnect)(void *),
- struct xdp_mem_info *mem)
-{
-}
-static inline void page_pool_release_page(struct page_pool *pool,
- struct page *page)
-{
-}
-
-static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
- int count)
-{
-}
-#endif
-
-void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size,
- bool allow_direct);
-
-/* pp_frag_count represents the number of writers who can update the page
- * either by updating skb->data or via DMA mappings for the device.
- * We can't rely on the page refcnt for that as we don't know who might be
- * holding page references and we can't reliably destroy or sync DMA mappings
- * of the fragments.
- *
- * When pp_frag_count reaches 0 we can either recycle the page if the page
- * refcnt is 1 or return it back to the memory allocator and destroy any
- * mappings we have.
- */
-static inline void page_pool_fragment_page(struct page *page, long nr)
-{
- atomic_long_set(&page->pp_frag_count, nr);
-}
-
-static inline long page_pool_defrag_page(struct page *page, long nr)
-{
- long ret;
-
- /* If nr == pp_frag_count then we have cleared all remaining
- * references to the page. No need to actually overwrite it, instead
- * we can leave this to be overwritten by the calling function.
- *
- * The main advantage to doing this is that an atomic_read is
- * generally a much cheaper operation than an atomic update,
- * especially when dealing with a page that may be partitioned
- * into only 2 or 3 pieces.
- */
- if (atomic_long_read(&page->pp_frag_count) == nr)
- return 0;
-
- ret = atomic_long_sub_return(nr, &page->pp_frag_count);
- WARN_ON(ret < 0);
- return ret;
-}
-
-static inline bool page_pool_is_last_frag(struct page_pool *pool,
- struct page *page)
-{
- /* If fragments aren't enabled or count is 0 we were the last user */
- return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
- (page_pool_defrag_page(page, 1) == 0);
-}
-
-static inline void page_pool_put_page(struct page_pool *pool,
- struct page *page,
- unsigned int dma_sync_size,
- bool allow_direct)
-{
- /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
- * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
- */
-#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_frag(pool, page))
- return;
-
- page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
-#endif
-}
-
-/* Same as above but will try to sync the entire area pool->max_len */
-static inline void page_pool_put_full_page(struct page_pool *pool,
- struct page *page, bool allow_direct)
-{
- page_pool_put_page(pool, page, -1, allow_direct);
-}
-
-/* Same as above but the caller must guarantee safe context. e.g NAPI */
-static inline void page_pool_recycle_direct(struct page_pool *pool,
- struct page *page)
-{
- page_pool_put_full_page(pool, page, true);
-}
-
-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
- (sizeof(dma_addr_t) > sizeof(unsigned long))
-
-static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
-{
- dma_addr_t ret = page->dma_addr;
-
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
-
- return ret;
-}
-
-static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
-{
- page->dma_addr = addr;
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- page->dma_addr_upper = upper_32_bits(addr);
-}
-
-static inline bool is_page_pool_compiled_in(void)
-{
-#ifdef CONFIG_PAGE_POOL
- return true;
-#else
- return false;
-#endif
-}
-
-static inline bool page_pool_put(struct page_pool *pool)
-{
- return refcount_dec_and_test(&pool->user_cnt);
-}
-
-/* Caller must provide appropriate safe context, e.g. NAPI. */
-void page_pool_update_nid(struct page_pool *pool, int new_nid);
-static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
-{
- if (unlikely(pool->p.nid != new_nid))
- page_pool_update_nid(pool, new_nid);
-}
-
-#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
new file mode 100644
index 000000000000..94231533a369
--- /dev/null
+++ b/include/net/page_pool/helpers.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * page_pool/helpers.h
+ * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
+ * Copyright (C) 2016 Red Hat, Inc.
+ */
+
+/**
+ * DOC: page_pool allocator
+ *
+ * The page_pool allocator is optimized for the XDP mode that
+ * uses one frame per-page, but it can fallback on the
+ * regular page allocator APIs.
+ *
+ * Basic use involves replacing alloc_pages() calls with the
+ * page_pool_alloc_pages() call. Drivers should use
+ * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
+ *
+ * API keeps track of in-flight pages, in order to let API user know
+ * when it is safe to free a page_pool object. Thus, API users
+ * must call page_pool_put_page() to free the page, or attach
+ * the page to a page_pool-aware objects like skbs marked with
+ * skb_mark_for_recycle().
+ *
+ * API user must call page_pool_put_page() once on a page, as it
+ * will either recycle the page, or in case of refcnt > 1, it will
+ * release the DMA mapping and in-flight state accounting.
+ */
+#ifndef _NET_PAGE_POOL_HELPERS_H
+#define _NET_PAGE_POOL_HELPERS_H
+
+#include <net/page_pool/types.h>
+
+#ifdef CONFIG_PAGE_POOL_STATS
+int page_pool_ethtool_stats_get_count(void);
+u8 *page_pool_ethtool_stats_get_strings(u8 *data);
+u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
+
+/*
+ * Drivers that wish to harvest page pool stats and report them to users
+ * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
+ * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
+ */
+bool page_pool_get_stats(struct page_pool *pool,
+ struct page_pool_stats *stats);
+#else
+static inline int page_pool_ethtool_stats_get_count(void)
+{
+ return 0;
+}
+
+static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
+{
+ return data;
+}
+
+static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+{
+ return data;
+}
+#endif
+
+/**
+ * page_pool_dev_alloc_pages() - allocate a page.
+ * @pool: pool from which to allocate
+ *
+ * Get a page from the page allocator or page_pool caches.
+ */
+static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_pages(pool, gfp);
+}
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
+/**
+ * page_pool_get_dma_dir() - Retrieve the stored DMA direction.
+ * @pool: pool from which page was allocated
+ *
+ * Get the stored dma direction. A driver might decide to store this locally
+ * and avoid the extra cache line from page_pool to determine the direction.
+ */
+static
+inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
+{
+ return pool->p.dma_dir;
+}
+
+/* pp_frag_count represents the number of writers who can update the page
+ * either by updating skb->data or via DMA mappings for the device.
+ * We can't rely on the page refcnt for that as we don't know who might be
+ * holding page references and we can't reliably destroy or sync DMA mappings
+ * of the fragments.
+ *
+ * When pp_frag_count reaches 0 we can either recycle the page if the page
+ * refcnt is 1 or return it back to the memory allocator and destroy any
+ * mappings we have.
+ */
+static inline void page_pool_fragment_page(struct page *page, long nr)
+{
+ atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_defrag_page(struct page *page, long nr)
+{
+ long ret;
+
+ /* If nr == pp_frag_count then we have cleared all remaining
+ * references to the page. No need to actually overwrite it, instead
+ * we can leave this to be overwritten by the calling function.
+ *
+ * The main advantage to doing this is that an atomic_read is
+ * generally a much cheaper operation than an atomic update,
+ * especially when dealing with a page that may be partitioned
+ * into only 2 or 3 pieces.
+ */
+ if (atomic_long_read(&page->pp_frag_count) == nr)
+ return 0;
+
+ ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ WARN_ON(ret < 0);
+ return ret;
+}
+
+static inline bool page_pool_is_last_frag(struct page_pool *pool,
+ struct page *page)
+{
+ /* If fragments aren't enabled or count is 0 we were the last user */
+ return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ (page_pool_defrag_page(page, 1) == 0);
+}
+
+/**
+ * page_pool_put_page() - release a reference to a page pool page
+ * @pool: pool from which page was allocated
+ * @page: page to release a reference on
+ * @dma_sync_size: how much of the page may have been touched by the device
+ * @allow_direct: released by the consumer, allow lockless caching
+ *
+ * The outcome of this depends on the page refcnt. If the driver bumps
+ * the refcnt > 1 this will unmap the page. If the page refcnt is 1
+ * the allocator owns the page and will try to recycle it in one of the pool
+ * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device
+ * using dma_sync_single_range_for_device().
+ */
+static inline void page_pool_put_page(struct page_pool *pool,
+ struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ if (!page_pool_is_last_frag(pool, page))
+ return;
+
+ page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
+#endif
+}
+
+/**
+ * page_pool_put_full_page() - release a reference on a page pool page
+ * @pool: pool from which page was allocated
+ * @page: page to release a reference on
+ * @allow_direct: released by the consumer, allow lockless caching
+ *
+ * Similar to page_pool_put_page(), but will DMA sync the entire memory area
+ * as configured in &page_pool_params.max_len.
+ */
+static inline void page_pool_put_full_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
+{
+ page_pool_put_page(pool, page, -1, allow_direct);
+}
+
+/**
+ * page_pool_recycle_direct() - release a reference on a page pool page
+ * @pool: pool from which page was allocated
+ * @page: page to release a reference on
+ *
+ * Similar to page_pool_put_full_page() but caller must guarantee safe context
+ * (e.g NAPI), since it will recycle the page directly into the pool fast cache.
+ */
+static inline void page_pool_recycle_direct(struct page_pool *pool,
+ struct page *page)
+{
+ page_pool_put_full_page(pool, page, true);
+}
+
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+ (sizeof(dma_addr_t) > sizeof(unsigned long))
+
+/**
+ * page_pool_get_dma_addr() - Retrieve the stored DMA address.
+ * @page: page allocated from a page pool
+ *
+ * Fetch the DMA address of the page. The page pool to which the page belongs
+ * must had been created with PP_FLAG_DMA_MAP.
+ */
+static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
+{
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
+ return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ page->dma_addr = addr;
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline bool page_pool_put(struct page_pool *pool)
+{
+ return refcount_dec_and_test(&pool->user_cnt);
+}
+
+static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
+{
+ if (unlikely(pool->p.nid != new_nid))
+ page_pool_update_nid(pool, new_nid);
+}
+
+#endif /* _NET_PAGE_POOL_HELPERS_H */
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
new file mode 100644
index 000000000000..887e7946a597
--- /dev/null
+++ b/include/net/page_pool/types.h
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NET_PAGE_POOL_TYPES_H
+#define _NET_PAGE_POOL_TYPES_H
+
+#include <linux/dma-direction.h>
+#include <linux/ptr_ring.h>
+
+#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
+ * map/unmap
+ */
+#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
+ * from page_pool will be
+ * DMA-synced-for-device according to
+ * the length provided by the device
+ * driver.
+ * Please note DMA-sync-for-CPU is still
+ * device driver responsibility
+ */
+#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
+ PP_FLAG_DMA_SYNC_DEV |\
+ PP_FLAG_PAGE_FRAG)
+
+/*
+ * Fast allocation side cache array/stack
+ *
+ * The cache size and refill watermark is related to the network
+ * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
+ * ring is usually refilled and the max consumed elements will be 64,
+ * thus a natural max size of objects needed in the cache.
+ *
+ * Keeping room for more objects, is due to XDP_DROP use-case. As
+ * XDP_DROP allows the opportunity to recycle objects directly into
+ * this array, as it shares the same softirq/NAPI protection. If
+ * cache is already full (or partly full) then the XDP_DROP recycles
+ * would have to take a slower code path.
+ */
+#define PP_ALLOC_CACHE_SIZE 128
+#define PP_ALLOC_CACHE_REFILL 64
+struct pp_alloc_cache {
+ u32 count;
+ struct page *cache[PP_ALLOC_CACHE_SIZE];
+};
+
+/**
+ * struct page_pool_params - page pool parameters
+ * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG
+ * @order: 2^order pages on allocation
+ * @pool_size: size of the ptr_ring
+ * @nid: NUMA node id to allocate from pages from
+ * @dev: device, for DMA pre-mapping purposes
+ * @napi: NAPI which is the sole consumer of pages, otherwise NULL
+ * @dma_dir: DMA mapping direction
+ * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
+ * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
+ */
+struct page_pool_params {
+ unsigned int flags;
+ unsigned int order;
+ unsigned int pool_size;
+ int nid;
+ struct device *dev;
+ struct napi_struct *napi;
+ enum dma_data_direction dma_dir;
+ unsigned int max_len;
+ unsigned int offset;
+/* private: used by test code only */
+ void (*init_callback)(struct page *page, void *arg);
+ void *init_arg;
+};
+
+#ifdef CONFIG_PAGE_POOL_STATS
+/**
+ * struct page_pool_alloc_stats - allocation statistics
+ * @fast: successful fast path allocations
+ * @slow: slow path order-0 allocations
+ * @slow_high_order: slow path high order allocations
+ * @empty: ptr ring is empty, so a slow path allocation was forced
+ * @refill: an allocation which triggered a refill of the cache
+ * @waive: pages obtained from the ptr ring that cannot be added to
+ * the cache due to a NUMA mismatch
+ */
+struct page_pool_alloc_stats {
+ u64 fast;
+ u64 slow;
+ u64 slow_high_order;
+ u64 empty;
+ u64 refill;
+ u64 waive;
+};
+
+/**
+ * struct page_pool_recycle_stats - recycling (freeing) statistics
+ * @cached: recycling placed page in the page pool cache
+ * @cache_full: page pool cache was full
+ * @ring: page placed into the ptr ring
+ * @ring_full: page released from page pool because the ptr ring was full
+ * @released_refcnt: page released (and not recycled) because refcnt > 1
+ */
+struct page_pool_recycle_stats {
+ u64 cached;
+ u64 cache_full;
+ u64 ring;
+ u64 ring_full;
+ u64 released_refcnt;
+};
+
+/**
+ * struct page_pool_stats - combined page pool use statistics
+ * @alloc_stats: see struct page_pool_alloc_stats
+ * @recycle_stats: see struct page_pool_recycle_stats
+ *
+ * Wrapper struct for combining page pool stats with different storage
+ * requirements.
+ */
+struct page_pool_stats {
+ struct page_pool_alloc_stats alloc_stats;
+ struct page_pool_recycle_stats recycle_stats;
+};
+#endif
+
+struct page_pool {
+ struct page_pool_params p;
+
+ long frag_users;
+ struct page *frag_page;
+ unsigned int frag_offset;
+ u32 pages_state_hold_cnt;
+
+ struct delayed_work release_dw;
+ void (*disconnect)(void *pool);
+ unsigned long defer_start;
+ unsigned long defer_warn;
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ /* these stats are incremented while in softirq context */
+ struct page_pool_alloc_stats alloc_stats;
+#endif
+ u32 xdp_mem_id;
+
+ /*
+ * Data structure for allocation side
+ *
+ * Drivers allocation side usually already perform some kind
+ * of resource protection. Piggyback on this protection, and
+ * require driver to protect allocation side.
+ *
+ * For NIC drivers this means, allocate a page_pool per
+ * RX-queue. As the RX-queue is already protected by
+ * Softirq/BH scheduling and napi_schedule. NAPI schedule
+ * guarantee that a single napi_struct will only be scheduled
+ * on a single CPU (see napi_schedule).
+ */
+ struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
+
+ /* Data structure for storing recycled pages.
+ *
+ * Returning/freeing pages is more complicated synchronization
+ * wise, because free's can happen on remote CPUs, with no
+ * association with allocation resource.
+ *
+ * Use ptr_ring, as it separates consumer and producer
+ * efficiently, it a way that doesn't bounce cache-lines.
+ *
+ * TODO: Implement bulk return pages into this structure.
+ */
+ struct ptr_ring ring;
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ /* recycle stats are per-cpu to avoid locking */
+ struct page_pool_recycle_stats __percpu *recycle_stats;
+#endif
+ atomic_t pages_state_release_cnt;
+
+ /* A page_pool is strictly tied to a single RX-queue being
+ * protected by NAPI, due to above pp_alloc_cache. This
+ * refcnt serves purpose is to simplify drivers error handling.
+ */
+ refcount_t user_cnt;
+
+ u64 destroy_cnt;
+};
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+ unsigned int size, gfp_t gfp);
+struct page_pool *page_pool_create(const struct page_pool_params *params);
+
+struct xdp_mem_info;
+
+#ifdef CONFIG_PAGE_POOL
+void page_pool_unlink_napi(struct page_pool *pool);
+void page_pool_destroy(struct page_pool *pool);
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
+ struct xdp_mem_info *mem);
+void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ int count);
+#else
+static inline void page_pool_unlink_napi(struct page_pool *pool)
+{
+}
+
+static inline void page_pool_destroy(struct page_pool *pool)
+{
+}
+
+static inline void page_pool_use_xdp_mem(struct page_pool *pool,
+ void (*disconnect)(void *),
+ struct xdp_mem_info *mem)
+{
+}
+
+static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
+ int count)
+{
+}
+#endif
+
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct);
+
+static inline bool is_page_pool_compiled_in(void)
+{
+#ifdef CONFIG_PAGE_POOL
+ return true;
+#else
+ return false;
+#endif
+}
+
+/* Caller must provide appropriate safe context, e.g. NAPI. */
+void page_pool_update_nid(struct page_pool *pool, int new_nid);
+
+#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a2ea45c7b53e..f308e8268651 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -138,19 +138,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
return NULL;
}
-static inline
-int tc_setup_cb_block_register(struct tcf_block *block, flow_setup_cb_t *cb,
- void *cb_priv)
-{
- return 0;
-}
-
-static inline
-void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
- void *cb_priv)
-{
-}
-
static inline int tcf_classify(struct sk_buff *skb,
const struct tcf_block *block,
const struct tcf_proto *tp,
@@ -866,6 +853,7 @@ struct tc_htb_qopt_offload {
u32 parent_classid;
u16 classid;
u16 qid;
+ u32 quantum;
u64 rate;
u64 ceil;
u8 prio;
diff --git a/include/net/route.h b/include/net/route.h
index 5a5c726472bd..51a45b1887b5 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -163,12 +163,12 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
}
static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4,
- struct sock *sk,
+ const struct sock *sk,
__be32 daddr, __be32 saddr,
__be16 dport, __be16 sport,
__u8 proto, __u8 tos, int oif)
{
- flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
+ flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
RT_SCOPE_UNIVERSE, proto,
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
@@ -298,10 +298,10 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
{
__u8 flow_flags = 0;
- if (inet_sk(sk)->transparent)
+ if (inet_test_bit(TRANSPARENT, sk))
flow_flags |= FLOWI_FLAG_ANYSRC;
- flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
+ flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), protocol, flow_flags, dst,
src, dport, sport, sk->sk_uid);
}
@@ -309,7 +309,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
__be32 src, int oif, u8 protocol,
__be16 sport, __be16 dport,
- struct sock *sk)
+ const struct sock *sk)
{
struct net *net = sock_net(sk);
struct rtable *rt;
@@ -330,7 +330,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst,
static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
__be16 orig_sport, __be16 orig_dport,
__be16 sport, __be16 dport,
- struct sock *sk)
+ const struct sock *sk)
{
if (sport != orig_sport || dport != orig_dport) {
fl4->fl4_dport = dport;
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index d9076a7a430c..6506221c5fe3 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
u32 portid, const struct nlmsghdr *nlh);
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
- struct netlink_ext_ack *exterr);
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+ struct netlink_ext_ack *exterr);
struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e92f73bb3198..f232512505f8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -599,6 +599,7 @@ get_default_qdisc_ops(const struct net_device *dev, int ntx)
struct Qdisc_class_common {
u32 classid;
+ unsigned int filter_cnt;
struct hlist_node hnode;
};
@@ -633,6 +634,31 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
return NULL;
}
+static inline bool qdisc_class_in_use(const struct Qdisc_class_common *cl)
+{
+ return cl->filter_cnt > 0;
+}
+
+static inline void qdisc_class_get(struct Qdisc_class_common *cl)
+{
+ unsigned int res;
+
+ if (check_add_overflow(cl->filter_cnt, 1, &res))
+ WARN(1, "Qdisc class overflow");
+
+ cl->filter_cnt = res;
+}
+
+static inline void qdisc_class_put(struct Qdisc_class_common *cl)
+{
+ unsigned int res;
+
+ if (check_sub_overflow(cl->filter_cnt, 1, &res))
+ WARN(1, "Qdisc class underflow");
+
+ cl->filter_cnt = res;
+}
+
static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
{
u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY;
@@ -703,7 +729,7 @@ int skb_do_redirect(struct sk_buff *);
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
return skb->tc_at_ingress;
#else
return false;
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 2a67100b2a17..a2310fa995f6 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -148,8 +148,6 @@ void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
void sctp_icmp_proto_unreachable(struct sock *sk,
struct sctp_association *asoc,
struct sctp_transport *t);
-void sctp_backlog_migrate(struct sctp_association *assoc,
- struct sock *oldsk, struct sock *newsk);
int sctp_transport_hashtable_init(void);
void sctp_transport_hashtable_destroy(void);
int sctp_hash_transport(struct sctp_transport *t);
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index f37c7a558d6d..64c42bd56bb2 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -156,7 +156,6 @@ sctp_state_fn_t sctp_sf_do_6_2_sack;
sctp_state_fn_t sctp_sf_autoclose_timer_expire;
/* Prototypes for utility support functions. */
-__u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
const struct sctp_sm_table_entry *sctp_sm_lookup_event(
struct net *net,
enum sctp_event_type event_type,
@@ -166,8 +165,6 @@ int sctp_chunk_iif(const struct sctp_chunk *);
struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
struct sctp_chunk *,
gfp_t gfp);
-__u32 sctp_generate_verification_tag(void);
-void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag);
/* Prototypes for chunk-building functions. */
struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5c72d1864dd6..5a24d6d8522a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1122,8 +1122,6 @@ void sctp_outq_free(struct sctp_outq*);
void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
int sctp_outq_is_empty(const struct sctp_outq *);
-void sctp_outq_restart(struct sctp_outq *);
-
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
enum sctp_retransmit_reason reason);
void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2eb916d1ff64..11d503417591 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1323,6 +1323,7 @@ struct proto {
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
+ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
* All the __sk_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
@@ -1339,6 +1340,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
+ unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
@@ -1420,6 +1422,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
return sk->sk_prot->memory_pressure != NULL;
}
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure &&
+ !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
static inline bool sk_under_memory_pressure(const struct sock *sk)
{
if (!sk->sk_prot->memory_pressure)
@@ -1429,7 +1437,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return !!*sk->sk_prot->memory_pressure;
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
static inline long
@@ -1506,7 +1514,7 @@ proto_memory_pressure(struct proto *prot)
{
if (!prot->memory_pressure)
return false;
- return !!*prot->memory_pressure;
+ return !!READ_ONCE(*prot->memory_pressure);
}
@@ -2814,20 +2822,23 @@ sk_is_refcounted(struct sock *sk)
* skb_steal_sock - steal a socket from an sk_buff
* @skb: sk_buff to steal the socket from
* @refcounted: is set to true if the socket is reference-counted
+ * @prefetched: is set to true if the socket was assigned from bpf
*/
static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted)
+skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
{
if (skb->sk) {
struct sock *sk = skb->sk;
*refcounted = true;
- if (skb_sk_is_prefetched(skb))
+ *prefetched = skb_sk_is_prefetched(skb);
+ if (*prefetched)
*refcounted = sk_is_refcounted(sk);
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
+ *prefetched = false;
*refcounted = false;
return NULL;
}
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index ca0312b78294..a43062d4c734 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -201,8 +201,6 @@ struct switchdev_obj_in_state_mrp {
#define SWITCHDEV_OBJ_IN_STATE_MRP(OBJ) \
container_of((OBJ), struct switchdev_obj_in_state_mrp, obj)
-typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
-
struct switchdev_brport {
struct net_device *dev;
const void *ctx;
@@ -231,6 +229,7 @@ enum switchdev_notifier_type {
SWITCHDEV_BRPORT_OFFLOADED,
SWITCHDEV_BRPORT_UNOFFLOADED,
+ SWITCHDEV_BRPORT_REPLAY,
};
struct switchdev_notifier_info {
@@ -299,6 +298,11 @@ void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb);
+int switchdev_bridge_port_replay(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack);
void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
@@ -322,10 +326,6 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack);
-void switchdev_port_fwd_mark_set(struct net_device *dev,
- struct net_device *group_dev,
- bool joining);
-
int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
const struct switchdev_notifier_fdb_info *fdb_info,
bool (*check_cb)(const struct net_device *dev),
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3a818fe1a8a5..91688d0dadcd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -322,7 +322,6 @@ int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb);
void tcp_remove_empty_skb(struct sock *sk);
-int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
@@ -349,7 +348,6 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule);
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
{
@@ -605,7 +603,6 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
unsigned int mss_now, gfp_t gfp);
void tcp_send_probe0(struct sock *);
-void tcp_send_partial(struct sock *);
int tcp_write_wakeup(struct sock *, int mib);
void tcp_send_fin(struct sock *sk);
void tcp_send_active_reset(struct sock *sk, gfp_t priority);
@@ -623,7 +620,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk, struct sk_buff *skb);
-void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
void tcp_check_space(struct sock *sk);
void tcp_sack_compress_send_ack(struct sock *sk);
@@ -1431,13 +1427,39 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
__u32 *window_clamp, int wscale_ok,
__u8 *rcv_wscale, __u32 init_rcv_wnd);
+static inline int __tcp_win_from_space(u8 scaling_ratio, int space)
+{
+ s64 scaled_space = (s64)space * scaling_ratio;
+
+ return scaled_space >> TCP_RMEM_TO_WIN_SCALE;
+}
+
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+ return __tcp_win_from_space(tcp_sk(sk)->scaling_ratio, space);
+}
+
+/* inverse of __tcp_win_from_space() */
+static inline int __tcp_space_from_win(u8 scaling_ratio, int win)
+{
+ u64 val = (u64)win << TCP_RMEM_TO_WIN_SCALE;
- return tcp_adv_win_scale <= 0 ?
- (space>>(-tcp_adv_win_scale)) :
- space - (space>>tcp_adv_win_scale);
+ do_div(val, scaling_ratio);
+ return val;
+}
+
+static inline int tcp_space_from_win(const struct sock *sk, int win)
+{
+ return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win);
+}
+
+static inline void tcp_scaling_ratio_init(struct sock *sk)
+{
+ /* Assume a conservative default of 1200 bytes of payload per 4K page.
+ * This may be adjusted later in tcp_measure_rcv_mss().
+ */
+ tcp_sk(sk)->scaling_ratio = (1200 << TCP_RMEM_TO_WIN_SCALE) /
+ SKB_TRUESIZE(4096);
}
/* Note: caller must be prepared to deal with negative returns */
@@ -2008,7 +2030,7 @@ static inline bool inet_sk_transparent(const struct sock *sk)
case TCP_NEW_SYN_RECV:
return inet_rsk(inet_reqsk(sk))->no_srccheck;
}
- return inet_sk(sk)->transparent;
+ return inet_test_bit(TRANSPARENT, sk);
}
/* Determines whether this is a thin stream (which may suffer from
@@ -2335,7 +2357,6 @@ struct sk_msg;
struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/include/net/tcx.h b/include/net/tcx.h
new file mode 100644
index 000000000000..264f147953ba
--- /dev/null
+++ b/include/net/tcx.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef __NET_TCX_H
+#define __NET_TCX_H
+
+#include <linux/bpf.h>
+#include <linux/bpf_mprog.h>
+
+#include <net/sch_generic.h>
+
+struct mini_Qdisc;
+
+struct tcx_entry {
+ struct mini_Qdisc __rcu *miniq;
+ struct bpf_mprog_bundle bundle;
+ bool miniq_active;
+ struct rcu_head rcu;
+};
+
+struct tcx_link {
+ struct bpf_link link;
+ struct net_device *dev;
+ u32 location;
+};
+
+static inline void tcx_set_ingress(struct sk_buff *skb, bool ingress)
+{
+#ifdef CONFIG_NET_XGRESS
+ skb->tc_at_ingress = ingress;
+#endif
+}
+
+#ifdef CONFIG_NET_XGRESS
+static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
+{
+ struct bpf_mprog_bundle *bundle = entry->parent;
+
+ return container_of(bundle, struct tcx_entry, bundle);
+}
+
+static inline struct tcx_link *tcx_link(struct bpf_link *link)
+{
+ return container_of(link, struct tcx_link, link);
+}
+
+static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link)
+{
+ return tcx_link((struct bpf_link *)link);
+}
+
+void tcx_inc(void);
+void tcx_dec(void);
+
+static inline void tcx_entry_sync(void)
+{
+ /* bpf_mprog_entry got a/b swapped, therefore ensure that
+ * there are no inflight users on the old one anymore.
+ */
+ synchronize_rcu();
+}
+
+static inline void
+tcx_entry_update(struct net_device *dev, struct bpf_mprog_entry *entry,
+ bool ingress)
+{
+ ASSERT_RTNL();
+ if (ingress)
+ rcu_assign_pointer(dev->tcx_ingress, entry);
+ else
+ rcu_assign_pointer(dev->tcx_egress, entry);
+}
+
+static inline struct bpf_mprog_entry *
+tcx_entry_fetch(struct net_device *dev, bool ingress)
+{
+ ASSERT_RTNL();
+ if (ingress)
+ return rcu_dereference_rtnl(dev->tcx_ingress);
+ else
+ return rcu_dereference_rtnl(dev->tcx_egress);
+}
+
+static inline struct bpf_mprog_entry *tcx_entry_create(void)
+{
+ struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL);
+
+ if (tcx) {
+ bpf_mprog_bundle_init(&tcx->bundle);
+ return &tcx->bundle.a;
+ }
+ return NULL;
+}
+
+static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
+{
+ kfree_rcu(tcx_entry(entry), rcu);
+}
+
+static inline struct bpf_mprog_entry *
+tcx_entry_fetch_or_create(struct net_device *dev, bool ingress, bool *created)
+{
+ struct bpf_mprog_entry *entry = tcx_entry_fetch(dev, ingress);
+
+ *created = false;
+ if (!entry) {
+ entry = tcx_entry_create();
+ if (!entry)
+ return NULL;
+ *created = true;
+ }
+ return entry;
+}
+
+static inline void tcx_skeys_inc(bool ingress)
+{
+ tcx_inc();
+ if (ingress)
+ net_inc_ingress_queue();
+ else
+ net_inc_egress_queue();
+}
+
+static inline void tcx_skeys_dec(bool ingress)
+{
+ if (ingress)
+ net_dec_ingress_queue();
+ else
+ net_dec_egress_queue();
+ tcx_dec();
+}
+
+static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
+ const bool active)
+{
+ ASSERT_RTNL();
+ tcx_entry(entry)->miniq_active = active;
+}
+
+static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
+{
+ ASSERT_RTNL();
+ return bpf_mprog_total(entry) || tcx_entry(entry)->miniq_active;
+}
+
+static inline enum tcx_action_base tcx_action_code(struct sk_buff *skb,
+ int code)
+{
+ switch (code) {
+ case TCX_PASS:
+ skb->tc_index = qdisc_skb_cb(skb)->tc_classid;
+ fallthrough;
+ case TCX_DROP:
+ case TCX_REDIRECT:
+ return code;
+ case TCX_NEXT:
+ default:
+ return TCX_NEXT;
+ }
+}
+#endif /* CONFIG_NET_XGRESS */
+
+#if defined(CONFIG_NET_XGRESS) && defined(CONFIG_BPF_SYSCALL)
+int tcx_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int tcx_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
+void tcx_uninstall(struct net_device *dev, bool ingress);
+
+int tcx_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
+static inline void dev_tcx_uninstall(struct net_device *dev)
+{
+ ASSERT_RTNL();
+ tcx_uninstall(dev, true);
+ tcx_uninstall(dev, false);
+}
+#else
+static inline int tcx_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_link_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_prog_detach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int tcx_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
+
+static inline void dev_tcx_uninstall(struct net_device *dev)
+{
+}
+#endif /* CONFIG_NET_XGRESS && CONFIG_BPF_SYSCALL */
+#endif /* __NET_TCX_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index 5e71dd3df8ca..a2b44578dcb7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -51,16 +51,6 @@
struct tls_rec;
-struct tls_cipher_size_desc {
- unsigned int iv;
- unsigned int key;
- unsigned int salt;
- unsigned int tag;
- unsigned int rec_seq;
-};
-
-extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
-
/* Maximum data size carried in a TLS record */
#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14)
@@ -69,10 +59,6 @@ extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type)
-#define TLS_RECORD_TYPE_ALERT 0x15
-#define TLS_RECORD_TYPE_HANDSHAKE 0x16
-#define TLS_RECORD_TYPE_DATA 0x17
-
#define TLS_AAD_SPACE_SIZE 13
#define MAX_IV_SIZE 16
diff --git a/include/net/tls_prot.h b/include/net/tls_prot.h
new file mode 100644
index 000000000000..68a40756440b
--- /dev/null
+++ b/include/net/tls_prot.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ *
+ * TLS Protocol definitions
+ *
+ * From https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml
+ */
+
+#ifndef _TLS_PROT_H
+#define _TLS_PROT_H
+
+/*
+ * TLS Record protocol: ContentType
+ */
+enum {
+ TLS_RECORD_TYPE_CHANGE_CIPHER_SPEC = 20,
+ TLS_RECORD_TYPE_ALERT = 21,
+ TLS_RECORD_TYPE_HANDSHAKE = 22,
+ TLS_RECORD_TYPE_DATA = 23,
+ TLS_RECORD_TYPE_HEARTBEAT = 24,
+ TLS_RECORD_TYPE_TLS12_CID = 25,
+ TLS_RECORD_TYPE_ACK = 26,
+};
+
+/*
+ * TLS Alert protocol: AlertLevel
+ */
+enum {
+ TLS_ALERT_LEVEL_WARNING = 1,
+ TLS_ALERT_LEVEL_FATAL = 2,
+};
+
+/*
+ * TLS Alert protocol: AlertDescription
+ */
+enum {
+ TLS_ALERT_DESC_CLOSE_NOTIFY = 0,
+ TLS_ALERT_DESC_UNEXPECTED_MESSAGE = 10,
+ TLS_ALERT_DESC_BAD_RECORD_MAC = 20,
+ TLS_ALERT_DESC_RECORD_OVERFLOW = 22,
+ TLS_ALERT_DESC_HANDSHAKE_FAILURE = 40,
+ TLS_ALERT_DESC_BAD_CERTIFICATE = 42,
+ TLS_ALERT_DESC_UNSUPPORTED_CERTIFICATE = 43,
+ TLS_ALERT_DESC_CERTIFICATE_REVOKED = 44,
+ TLS_ALERT_DESC_CERTIFICATE_EXPIRED = 45,
+ TLS_ALERT_DESC_CERTIFICATE_UNKNOWN = 46,
+ TLS_ALERT_DESC_ILLEGAL_PARAMETER = 47,
+ TLS_ALERT_DESC_UNKNOWN_CA = 48,
+ TLS_ALERT_DESC_ACCESS_DENIED = 49,
+ TLS_ALERT_DESC_DECODE_ERROR = 50,
+ TLS_ALERT_DESC_DECRYPT_ERROR = 51,
+ TLS_ALERT_DESC_TOO_MANY_CIDS_REQUESTED = 52,
+ TLS_ALERT_DESC_PROTOCOL_VERSION = 70,
+ TLS_ALERT_DESC_INSUFFICIENT_SECURITY = 71,
+ TLS_ALERT_DESC_INTERNAL_ERROR = 80,
+ TLS_ALERT_DESC_INAPPROPRIATE_FALLBACK = 86,
+ TLS_ALERT_DESC_USER_CANCELED = 90,
+ TLS_ALERT_DESC_MISSING_EXTENSION = 109,
+ TLS_ALERT_DESC_UNSUPPORTED_EXTENSION = 110,
+ TLS_ALERT_DESC_UNRECOGNIZED_NAME = 112,
+ TLS_ALERT_DESC_BAD_CERTIFICATE_STATUS_RESPONSE = 113,
+ TLS_ALERT_DESC_UNKNOWN_PSK_IDENTITY = 115,
+ TLS_ALERT_DESC_CERTIFICATE_REQUIRED = 116,
+ TLS_ALERT_DESC_NO_APPLICATION_PROTOCOL = 120,
+};
+
+#endif /* _TLS_PROT_H */
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index d27b1caf3753..1a97e3f32029 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -33,8 +33,6 @@ void udplitev6_exit(void);
int tcpv6_init(void);
void tcpv6_exit(void);
-int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
-
/* this does all the common and the specific ctl work */
void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
struct sk_buff *skb);
diff --git a/include/net/udp.h b/include/net/udp.h
index 4d13424f8f72..488a6d2babcc 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -273,9 +273,6 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
int udp_v4_early_demux(struct sk_buff *skb);
bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
-int udp_get_port(struct sock *sk, unsigned short snum,
- int (*saddr_cmp)(const struct sock *,
- const struct sock *));
int udp_err(struct sk_buff *, u32);
int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
@@ -529,7 +526,6 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
-struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
#endif
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 299c14ce2bb9..bd33ff2b8f42 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -81,6 +81,4 @@ static inline __wsum udplite_csum(struct sk_buff *skb)
}
void udplite4_register(void);
-int udplite_get_port(struct sock *sk, unsigned short snum,
- int (*scmp)(const struct sock *, const struct sock *));
#endif /* _UDPLITE_H */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 1648240c9668..6a9f8a5f387c 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -556,12 +556,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
}
static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
- int hash,
+ u32 hash,
struct vxlan_rdst *rdst)
{
struct fib_nh_common *nhc;
- nhc = nexthop_path_fdb_result(nh, hash);
+ nhc = nexthop_path_fdb_result(nh, hash >> 1);
if (unlikely(!nhc))
return false;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index d1c5381fc95f..de08c8e0d134 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -6,9 +6,10 @@
#ifndef __LINUX_NET_XDP_H__
#define __LINUX_NET_XDP_H__
-#include <linux/skbuff.h> /* skb_shared_info */
-#include <uapi/linux/netdev.h>
#include <linux/bitfield.h>
+#include <linux/filter.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h> /* skb_shared_info */
/**
* DOC: XDP RX-queue information
@@ -45,8 +46,6 @@ enum xdp_mem_type {
MEM_TYPE_MAX,
};
-typedef u32 xdp_features_t;
-
/* XDP flags for ndo_xdp_xmit */
#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
@@ -443,6 +442,12 @@ enum xdp_rss_hash_type {
XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR,
};
+struct xdp_metadata_ops {
+ int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
+ int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type);
+};
+
#ifdef CONFIG_NET
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
@@ -474,4 +479,20 @@ static inline void xdp_clear_features_flag(struct net_device *dev)
xdp_set_features_flag(dev, 0);
}
+static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+ struct xdp_buff *xdp)
+{
+ /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus
+ * under local_bh_disable(), which provides the needed RCU protection
+ * for accessing map entries.
+ */
+ u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+ if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+ if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+ act = xdp_master_redirect(xdp);
+ }
+
+ return act;
+}
#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e96a1151ec75..1617af380162 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -52,6 +52,7 @@ struct xdp_sock {
struct xsk_buff_pool *pool;
u16 queue_id;
bool zc;
+ bool sg;
enum {
XSK_READY = 0,
XSK_BOUND,
@@ -67,6 +68,12 @@ struct xdp_sock {
u64 rx_dropped;
u64 rx_queue_full;
+ /* When __xsk_generic_xmit() must return before it sees the EOP descriptor for the current
+ * packet, the partially built skb is saved here so that packet building can resume in next
+ * call of __xsk_generic_xmit().
+ */
+ struct sk_buff *skb;
+
struct list_head map_list;
/* Protects map_list */
spinlock_t map_list_lock;
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index c243f906ebed..1f6fc8c7a84c 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -89,6 +89,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return xp_alloc(pool);
}
+static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+{
+ return !xp_mb_desc(desc);
+}
+
/* Returns as many entries as possible up to max. 0 <= N <= max. */
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
@@ -103,10 +108,45 @@ static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count)
static inline void xsk_buff_free(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ struct list_head *xskb_list = &xskb->pool->xskb_list;
+ struct xdp_buff_xsk *pos, *tmp;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+ list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_del(&pos->xskb_list_node);
+ xp_free(pos);
+ }
+
+ xdp_get_shared_info_from_buff(xdp)->nr_frags = 0;
+out:
xp_free(xskb);
}
+static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+{
+ struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+ list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
+}
+
+static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+{
+ struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+ struct xdp_buff *ret = NULL;
+ struct xdp_buff_xsk *frag;
+
+ frag = list_first_entry_or_null(&xskb->pool->xskb_list,
+ struct xdp_buff_xsk, xskb_list_node);
+ if (frag) {
+ list_del(&frag->xskb_list_node);
+ ret = &frag->xdp;
+ }
+
+ return ret;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
@@ -241,6 +281,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool)
return NULL;
}
+static inline bool xsk_is_eop_desc(struct xdp_desc *desc)
+{
+ return false;
+}
+
static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
{
return 0;
@@ -255,6 +300,15 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
{
}
+static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
+{
+}
+
+static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
+{
+ return NULL;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 151ca95dd08d..363c7d510554 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1984,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
if (dev->xfrmdev_ops->xdo_dev_state_free)
dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
netdev_put(dev, &xso->dev_tracker);
}
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index a8d7b8a3688a..b0bdff26fc88 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -29,6 +29,7 @@ struct xdp_buff_xsk {
struct xsk_buff_pool *pool;
u64 orig_addr;
struct list_head free_list_node;
+ struct list_head xskb_list_node;
};
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
@@ -54,6 +55,7 @@ struct xsk_buff_pool {
struct xdp_umem *umem;
struct work_struct work;
struct list_head free_list;
+ struct list_head xskb_list;
u32 heads_cnt;
u16 queue_id;
@@ -184,6 +186,11 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
!(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK);
}
+static inline bool xp_mb_desc(struct xdp_desc *desc)
+{
+ return desc->options & XDP_PKT_CONTD;
+}
+
static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
{
return addr & pool->chunk_mask;