diff options
Diffstat (limited to 'include/net')
40 files changed, 410 insertions, 192 deletions
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 9e85424c8343..70302c92d329 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -242,8 +242,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else diff --git a/include/net/ax25.h b/include/net/ax25.h index cb622d84cd0c..4ee141aae0a2 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -231,6 +231,7 @@ typedef struct ax25_dev { #endif refcount_t refcount; bool device_up; + struct rcu_head rcu; } ax25_dev; typedef struct ax25_cb { @@ -290,9 +291,8 @@ static inline void ax25_dev_hold(ax25_dev *ax25_dev) static inline void ax25_dev_put(ax25_dev *ax25_dev) { - if (refcount_dec_and_test(&ax25_dev->refcount)) { - kfree(ax25_dev); - } + if (refcount_dec_and_test(&ax25_dev->refcount)) + kfree_rcu(ax25_dev, rcu); } static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { @@ -335,9 +335,9 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *); extern spinlock_t ax25_dev_lock; #if IS_ENABLED(CONFIG_AX25) -static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev) +static inline ax25_dev *ax25_dev_ax25dev(const struct net_device *dev) { - return dev->ax25_ptr; + return rcu_dereference_rtnl(dev->ax25_ptr); } #endif diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5bb4eaa52e14..4b3200542fe6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -353,6 +353,22 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, + + /* When this quirk is set, the HCI_OP_READ_VOICE_SETTING command is + * skipped. This is required for a subset of the CSR controller clones + * which erroneously claim to support it. + * + * This quirk must be set before hci_register_dev is called. + */ + HCI_QUIRK_BROKEN_READ_VOICE_SETTING, + + /* When this quirk is set, the HCI_OP_READ_PAGE_SCAN_TYPE command is + * skipped. This is required for a subset of the CSR controller clones + * which erroneously claim to support it. + * + * This quirk must be set before hci_register_dev is called. + */ + HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, }; /* HCI device flags */ @@ -683,7 +699,7 @@ enum { #define HCI_ERROR_REMOTE_POWER_OFF 0x15 #define HCI_ERROR_LOCAL_HOST_TERM 0x16 #define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18 -#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e +#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1a #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c @@ -1897,6 +1913,8 @@ struct hci_cp_le_pa_create_sync { __u8 sync_cte_type; } __packed; +#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045 + #define HCI_OP_LE_PA_TERM_SYNC 0x2046 struct hci_cp_le_pa_term_sync { __le16 handle; @@ -2594,6 +2612,7 @@ struct hci_ev_le_conn_complete { #define LE_EXT_ADV_DIRECT_IND 0x0004 #define LE_EXT_ADV_SCAN_RSP 0x0008 #define LE_EXT_ADV_LEGACY_PDU 0x0010 +#define LE_EXT_ADV_DATA_STATUS_MASK 0x0060 #define LE_EXT_ADV_EVT_TYPE_MASK 0x007f #define ADDR_LE_DEV_PUBLIC 0x00 @@ -2796,7 +2815,7 @@ struct hci_evt_le_create_big_complete { __le16 bis_handle[]; } __packed; -#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d +#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d struct hci_evt_le_big_sync_estabilished { __u8 status; __u8 handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ba7b52584770..df4af45f8603 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -29,6 +29,8 @@ #include <linux/idr.h> #include <linux/leds.h> #include <linux/rculist.h> +#include <linux/spinlock.h> +#include <linux/srcu.h> #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_sync.h> @@ -92,6 +94,7 @@ struct discovery_state { u16 uuid_count; u8 (*uuids)[16]; unsigned long name_resolve_timeout; + spinlock_t lock; }; #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ @@ -338,6 +341,7 @@ struct adv_monitor { struct hci_dev { struct list_head list; + struct srcu_struct srcu; struct mutex lock; struct ida unset_handle_ida; @@ -538,6 +542,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; struct list_head mesh_pending; + struct mutex mgmt_pending_lock; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; @@ -804,6 +809,7 @@ struct hci_conn_params { extern struct list_head hci_dev_list; extern struct list_head hci_cb_list; extern rwlock_t hci_dev_list_lock; +extern struct mutex hci_cb_list_lock; #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) @@ -813,20 +819,20 @@ extern rwlock_t hci_dev_list_lock; #define hci_dev_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), (hdev)->dev_flags) #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags) -#define hci_dev_clear_volatile_flags(hdev) \ - do { \ - hci_dev_clear_flag(hdev, HCI_LE_SCAN); \ - hci_dev_clear_flag(hdev, HCI_LE_ADV); \ - hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\ - hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); \ - hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ +#define hci_dev_clear_volatile_flags(hdev) \ + do { \ + hci_dev_clear_flag((hdev), HCI_LE_SCAN); \ + hci_dev_clear_flag((hdev), HCI_LE_ADV); \ + hci_dev_clear_flag((hdev), HCI_LL_RPA_RESOLUTION); \ + hci_dev_clear_flag((hdev), HCI_PERIODIC_INQ); \ + hci_dev_clear_flag((hdev), HCI_QUALITY_REPORT); \ } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ - (hdev->le_states[4] & 0x08) && /* Central */ \ - (hdev->le_states[4] & 0x40) && /* Peripheral */ \ - (hdev->le_states[3] & 0x10)) /* Simultaneous */ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &(hdev)->quirks) && \ + ((hdev)->le_states[4] & 0x08) && /* Central */ \ + ((hdev)->le_states[4] & 0x40) && /* Peripheral */ \ + ((hdev)->le_states[3] & 0x10)) /* Simultaneous */ /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); @@ -869,6 +875,7 @@ static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, static inline void discovery_init(struct hci_dev *hdev) { + spin_lock_init(&hdev->discovery.lock); hdev->discovery.state = DISCOVERY_STOPPED; INIT_LIST_HEAD(&hdev->discovery.all); INIT_LIST_HEAD(&hdev->discovery.unknown); @@ -883,8 +890,11 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) hdev->discovery.report_invalid_rssi = true; hdev->discovery.rssi = HCI_RSSI_INVALID; hdev->discovery.uuid_count = 0; + + spin_lock(&hdev->discovery.lock); kfree(hdev->discovery.uuids); hdev->discovery.uuids = NULL; + spin_unlock(&hdev->discovery.lock); } bool hci_discovery_active(struct hci_dev *hdev); @@ -1104,10 +1114,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, - __u8 sid, - bdaddr_t *dst, - __u8 dst_type) +static inline struct hci_conn * +hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1115,8 +1123,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || bacmp(&c->dst, dst) || - c->dst_type != dst_type || c->sid != sid) + if (c->type != ISO_LINK) + continue; + + if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) continue; rcu_read_unlock(); @@ -1505,8 +1515,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); -int hci_pa_create_sync_pending(struct hci_dev *hdev); -int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, @@ -1547,9 +1555,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]); +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, @@ -1920,6 +1928,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->commands[20] & 0x10 && \ !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) +#define read_voice_setting_capable(dev) \ + ((dev)->commands[9] & 0x04 && \ + !test_bit(HCI_QUIRK_BROKEN_READ_VOICE_SETTING, &(dev)->quirks)) + /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ @@ -2006,47 +2018,24 @@ struct hci_cb { char *name; - bool (*match) (struct hci_conn *conn); void (*connect_cfm) (struct hci_conn *conn, __u8 status); void (*disconn_cfm) (struct hci_conn *conn, __u8 status); void (*security_cfm) (struct hci_conn *conn, __u8 status, - __u8 encrypt); + __u8 encrypt); void (*key_change_cfm) (struct hci_conn *conn, __u8 status); void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); }; -static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list) -{ - struct hci_cb *cb, *cpy; - - rcu_read_lock(); - list_for_each_entry_rcu(cb, &hci_cb_list, list) { - if (cb->match && cb->match(conn)) { - cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC); - if (!cpy) - break; - - *cpy = *cb; - INIT_LIST_HEAD(&cpy->list); - list_add_rcu(&cpy->list, list); - } - } - rcu_read_unlock(); -} - static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; + struct hci_cb *cb; - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->connect_cfm) cb->connect_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->connect_cfm_cb) conn->connect_cfm_cb(conn, status); @@ -2054,43 +2043,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->disconn_cfm) cb->disconn_cfm(conn, reason); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); if (conn->disconn_cfm_cb) conn->disconn_cfm_cb(conn, reason); } -static inline void hci_security_cfm(struct hci_conn *conn, __u8 status, - __u8 encrypt) -{ - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); - - list_for_each_entry_safe(cb, tmp, &list, list) { - if (cb->security_cfm) - cb->security_cfm(conn, status, encrypt); - kfree(cb); - } - - if (conn->security_cfm_cb) - conn->security_cfm_cb(conn, status); -} - static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) @@ -2098,11 +2066,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { + struct hci_cb *cb; __u8 encrypt; if (conn->state == BT_CONFIG) { @@ -2129,38 +2106,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) conn->sec_level = conn->pending_sec_level; } - hci_security_cfm(conn, status, encrypt); + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->security_cfm) + cb->security_cfm(conn, status, encrypt); + } + mutex_unlock(&hci_cb_list_lock); + + if (conn->security_cfm_cb) + conn->security_cfm_cb(conn, status); } static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->key_change_cfm) cb->key_change_cfm(conn, status); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, __u8 role) { - struct list_head list; - struct hci_cb *cb, *tmp; - - INIT_LIST_HEAD(&list); - hci_cb_lookup(conn, &list); + struct hci_cb *cb; - list_for_each_entry_safe(cb, tmp, &list, list) { + mutex_lock(&hci_cb_list_lock); + list_for_each_entry(cb, &hci_cb_list, list) { if (cb->role_switch_cfm) cb->role_switch_cfm(conn, status, role); - kfree(cb); } + mutex_unlock(&hci_cb_list_lock); } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) @@ -2409,7 +2388,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index f3052cb252ef..dbabc17b30cd 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -186,3 +186,6 @@ int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params); + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index d9c767cf773d..9189354c568f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -668,7 +668,7 @@ struct l2cap_conn { struct l2cap_chan *smp; struct list_head chan_l; - struct mutex chan_lock; + struct mutex lock; struct kref ref; struct list_head users; }; @@ -970,6 +970,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err); void l2cap_send_conn_req(struct l2cap_chan *chan); struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn); +struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 941dc62f3027..c555d9964702 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -127,6 +127,8 @@ struct wiphy; * even if it is otherwise disabled. * @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation * with very low power (VLP), even if otherwise set to NO_IR. + * @IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY: Allow activity on a 20 MHz channel, + * even if otherwise set to NO_IR. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -155,6 +157,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), IEEE80211_CHAN_CAN_MONITOR = BIT(24), IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25), + IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY = BIT(26), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -630,7 +633,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, const struct ieee80211_sband_iftype_data *data; int i; - if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) + if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) return NULL; if (iftype == NL80211_IFTYPE_AP_VLAN) @@ -2707,7 +2710,7 @@ struct cfg80211_scan_request { s8 tsf_report_link_id; /* keep last */ - struct ieee80211_channel *channels[] __counted_by(n_channels); + struct ieee80211_channel *channels[]; }; static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) diff --git a/include/net/checksum.h b/include/net/checksum.h index 1338cb92c8e7..28b101f26636 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -158,7 +158,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr); + __wsum diff, bool pseudohdr, bool ipv6); static __always_inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, diff --git a/include/net/dst.h b/include/net/dst.h index 0f303cc60252..e18826cd0559 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -440,6 +440,15 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) dst->expires = expires; } +static inline unsigned int dst_dev_overhead(struct dst_entry *dst, + struct sk_buff *skb) +{ + if (likely(dst)) + return LL_RESERVED_SPACE(dst->dev); + + return skb->mac_len; +} + INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, @@ -447,7 +456,7 @@ INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, /* Output packet to network from transport. */ static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - return INDIRECT_CALL_INET(skb_dst(skb)->output, + return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->output), ip6_output, ip_output, net, sk, skb); } @@ -457,7 +466,7 @@ INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); /* Input packet from network to transport. */ static inline int dst_input(struct sk_buff *skb) { - return INDIRECT_CALL_INET(skb_dst(skb)->input, + return INDIRECT_CALL_INET(READ_ONCE(skb_dst(skb)->input), ip6_input, ip_local_deliver, skb); } diff --git a/include/net/gro.h b/include/net/gro.h index b9b58c1f8d19..7b548f91754b 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -11,6 +11,9 @@ #include <net/udp.h> #include <net/hotdata.h> +/* This should be increased if a protocol with a bigger head is added. */ +#define GRO_MAX_HEAD (MAX_HEADER + 128) + struct napi_gro_cb { union { struct { diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 74ff688568a0..f475757daafb 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -96,30 +96,28 @@ static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr) /* can be called with or without local BH being disabled */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, - const struct inetpeer_addr *daddr, - int create); + const struct inetpeer_addr *daddr); static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, - int vif, int create) + int vif) { struct inetpeer_addr daddr; daddr.a4.addr = v4daddr; daddr.a4.vif = vif; daddr.family = AF_INET; - return inet_getpeer(base, &daddr, create); + return inet_getpeer(base, &daddr); } static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, - const struct in6_addr *v6daddr, - int create) + const struct in6_addr *v6daddr) { struct inetpeer_addr daddr; daddr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(base, &daddr, create); + return inet_getpeer(base, &daddr); } static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, diff --git a/include/net/ip.h b/include/net/ip.h index d92d3bc3ec0e..bd201278c55a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -424,6 +424,11 @@ int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } +static inline dscp_t ip4h_dscp(const struct iphdr *ip4h) +{ + return inet_dsfield_to_dscp(ip4h->tos); +} + static inline int ip_mtu_locked(const struct dst_entry *dst) { const struct rtable *rt = dst_rtable(dst); @@ -465,9 +470,12 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = dst_rtable(dst); - struct net *net = dev_net(dst->dev); - unsigned int mtu; + unsigned int mtu, res; + struct net *net; + rcu_read_lock(); + + net = dev_net_rcu(dst->dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -491,7 +499,11 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, out: mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + res = mtu - lwtunnel_headroom(dst->lwtstate, mtu); + + rcu_read_unlock(); + + return res; } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 248bfb26e2af..6d52b5584d2f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -363,15 +363,6 @@ struct ipcm6_cookie { struct ipv6_txoptions *opt; }; -static inline void ipcm6_init(struct ipcm6_cookie *ipc6) -{ - *ipc6 = (struct ipcm6_cookie) { - .hlimit = -1, - .tclass = -1, - .dontfrag = -1, - }; -} - static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, const struct sock *sk) { diff --git a/include/net/kcm.h b/include/net/kcm.h index 441e993be634..d9c35e71ecea 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -71,7 +71,6 @@ struct kcm_sock { struct list_head wait_psock_list; struct sk_buff *seq_skb; struct mutex tx_mutex; - u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 031c661aa14d..bdfa9d414360 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -198,10 +198,12 @@ struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(dev)) { struct net_device *master; + rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); + rcu_read_unlock(); } return skb; diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 53bd2d02a4f0..09791f5d9b6e 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -138,12 +138,12 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, static inline void lwtunnel_set_redirect(struct dst_entry *dst) { if (lwtunnel_output_redirect(dst->lwtstate)) { - dst->lwtstate->orig_output = dst->output; - dst->output = lwtunnel_output; + dst->lwtstate->orig_output = READ_ONCE(dst->output); + WRITE_ONCE(dst->output, lwtunnel_output); } if (lwtunnel_input_redirect(dst->lwtstate)) { - dst->lwtstate->orig_input = dst->input; - dst->input = lwtunnel_input; + dst->lwtstate->orig_input = READ_ONCE(dst->input); + WRITE_ONCE(dst->input, lwtunnel_input); } } #else diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5b712582f9a9..80259a37e724 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018 - 2025 Intel Corporation */ #ifndef MAC80211_H @@ -2826,6 +2826,11 @@ struct ieee80211_txq { * implements MLO, so operation can continue on other links when one * link is switching. * + * @IEEE80211_HW_STRICT: strictly enforce certain things mandated by the spec + * but otherwise ignored/worked around for interoperability. This is a + * HW flag so drivers can opt in according to their own control, e.g. in + * testing. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2885,6 +2890,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DISALLOW_PUNCTURING, IEEE80211_HW_DISALLOW_PUNCTURING_5GHZ, IEEE80211_HW_HANDLES_QUIET_CSA, + IEEE80211_HW_STRICT, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3797,7 +3803,7 @@ enum ieee80211_reconfig_type { * @was_assoc: set if this call is due to deauth/disassoc * while just having been associated * @link_id: the link id on which the frame will be TX'ed. - * Only used with the mgd_prepare_tx() method. + * 0 for a non-MLO connection. */ struct ieee80211_prep_tx_info { u16 duration; @@ -4263,6 +4269,8 @@ struct ieee80211_prep_tx_info { * @mgd_complete_tx: Notify the driver that the response frame for a previously * transmitted frame announced with @mgd_prepare_tx was received, the data * is filled similarly to @mgd_prepare_tx though the duration is not used. + * Note that this isn't always called for each mgd_prepare_tx() call, for + * example for SAE the 'confirm' messages can be on the air in any order. * * @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending * a TDLS discovery-request, we expect a reply to arrive on the AP's @@ -4427,6 +4435,8 @@ struct ieee80211_prep_tx_info { * new links bitmaps may be 0 if going from/to a non-MLO situation. * The @old array contains pointers to the old bss_conf structures * that were already removed, in case they're needed. + * Note that removal of link should always succeed, so the return value + * will be ignored in a removal only case. * This callback can sleep. * @change_sta_links: Change the valid links of a station, similar to * @change_vif_links. This callback can sleep. @@ -5306,22 +5316,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, int max_rates); /** - * ieee80211_sta_set_expected_throughput - set the expected tpt for a station - * - * Call this function to notify mac80211 about a change in expected throughput - * to a station. A driver for a device that does rate control in firmware can - * call this function when the expected throughput estimate towards a station - * changes. The information is used to tune the CoDel AQM applied to traffic - * going towards that station (which can otherwise be too aggressive and cause - * slow stations to starve). - * - * @pubsta: the station to set throughput for. - * @thr: the current expected throughput in kbps. - */ -void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, - u32 thr); - -/** * ieee80211_tx_rate_update - transmit rate update callback * * Drivers should call this functions with a non-NULL pub sta diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index de47fa533b15..6a0e83ac0fdb 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -406,8 +406,6 @@ struct gdma_context { struct gdma_dev mana_ib; }; -#define MAX_NUM_GDMA_DEVICES 4 - static inline bool mana_gd_is_mana(struct gdma_dev *gd) { return gd->dev_id.type == GDMA_DEVICE_MANA; @@ -554,11 +552,15 @@ enum { #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) #define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) +/* Driver can handle holes (zeros) in the device list */ +#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ - GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP) #define GDMA_DRV_CAP_FLAGS2 0 @@ -619,11 +621,12 @@ struct gdma_query_max_resources_resp { }; /* HW DATA */ /* GDMA_LIST_DEVICES */ +#define GDMA_DEV_LIST_SIZE 64 struct gdma_list_devices_resp { struct gdma_resp_hdr hdr; u32 num_of_devs; u32 reserved; - struct gdma_dev_id devs[64]; + struct gdma_dev_id devs[GDMA_DEV_LIST_SIZE]; }; /* HW DATA */ /* GDMA_REGISTER_DEVICE */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a44f262a7384..cb5f835a5d61 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -180,6 +180,7 @@ struct pneigh_entry { netdevice_tracker dev_tracker; u32 flags; u8 protocol; + bool permanent; u32 key[]; }; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 9398c8f49953..022ee2fc627c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -291,6 +291,7 @@ static inline int check_net(const struct net *net) } void net_drop_ns(void *); +void net_passive_dec(struct net *net); #else @@ -320,8 +321,23 @@ static inline int check_net(const struct net *net) } #define net_drop_ns NULL + +static inline void net_passive_dec(struct net *net) +{ + refcount_dec(&net->passive); +} #endif +static inline void net_passive_inc(struct net *net) +{ + refcount_inc(&net->passive); +} + +/* Returns true if the netns initialization is completed successfully */ +static inline bool net_initialized(const struct net *net) +{ + return READ_ONCE(net->list.next); +} static inline void __netns_tracker_alloc(struct net *net, netns_tracker *tracker, @@ -387,7 +403,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } -static inline struct net *read_pnet_rcu(possible_net_t *pnet) +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference(pnet->net); diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 5ca019d294ca..173bcfcd868a 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -92,6 +92,12 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum); + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cba3ccf03fcc..8cb70e7485e2 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -308,8 +308,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct) /* use after obtaining a reference count */ static inline bool nf_ct_should_gc(const struct nf_conn *ct) { - return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && - !nf_ct_is_dying(ct); + if (!nf_ct_is_confirmed(ct)) + return false; + + /* load ct->timeout after is_confirmed() test. + * Pairs with __nf_conntrack_confirm() which: + * 1. Increases ct->timeout value + * 2. Inserts ct into rcu hlist + * 3. Sets the confirmed bit + * 4. Unlocks the hlist lock + */ + smp_acquire__after_ctrl_dep(); + + return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct); } #define NF_CT_DAY (86400 * HZ) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index b63d53bb9dd6..1a6fca013165 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -369,7 +369,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) { - if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN)) return false; *inner_proto = __nf_flow_pppoe_proto(skb); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 471c353d32a4..757abcb54d11 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -442,6 +442,9 @@ struct nft_set_ext; * @remove: remove element from set * @walk: iterate over all set elements * @get: get set elements + * @ksize: kernel set size + * @usize: userspace set size + * @adjust_maxsize: delta to adjust maximum set size * @commit: commit set elements * @abort: abort set elements * @privsize: function to return size of set private data @@ -495,6 +498,9 @@ struct nft_set_ops { const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); + u32 (*ksize)(u32 size); + u32 (*usize)(u32 size); + u32 (*adjust_maxsize)(const struct nft_set *set); void (*commit)(struct nft_set *set); void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], @@ -1883,7 +1889,7 @@ void nft_chain_filter_fini(void); void __init nft_chain_route_init(void); void nft_chain_route_fini(void); -void nf_tables_trans_destroy_flush_work(void); +void nf_tables_trans_destroy_flush_work(struct net *net); int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input); @@ -1897,6 +1903,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head destroy_list; struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; @@ -1907,6 +1914,7 @@ struct nftables_pernet { unsigned int base_seq; unsigned int gc_seq; u8 validate_state; + struct work_struct destroy_work; }; extern unsigned int nf_tables_net_id; diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 38cae7113de4..7370fba844ef 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -2,6 +2,7 @@ #ifndef _NFT_FIB_H_ #define _NFT_FIB_H_ +#include <net/l3mdev.h> #include <net/netfilter/nf_tables.h> struct nft_fib { @@ -18,6 +19,35 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in) return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } +static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt) +{ + const struct net_device *indev = nft_in(pkt); + const struct sock *sk; + + switch (nft_hook(pkt)) { + case NF_INET_PRE_ROUTING: + case NF_INET_INGRESS: + case NF_INET_LOCAL_IN: + break; + default: + return false; + } + + sk = pkt->skb->sk; + if (sk && sk_fullsock(sk)) + return sk->sk_rx_dst_ifindex == indev->ifindex; + + return nft_fib_is_loopback(pkt->skb, indev); +} + +static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt, + const struct net_device *iif) +{ + const struct net_device *dev = iif ? iif : pkt->skb->dev; + + return l3mdev_master_ifindex_rcu(dev); +} + int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index ae60d6664095..23dd647fe024 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -43,6 +43,7 @@ struct netns_xfrm { struct hlist_head __rcu *state_bysrc; struct hlist_head __rcu *state_byspi; struct hlist_head __rcu *state_byseq; + struct hlist_head __percpu *state_cache_input; unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 793e6fd78bc5..60a5347922be 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -294,7 +294,7 @@ static inline long page_pool_unref_page(struct page *page, long nr) static inline void page_pool_ref_netmem(netmem_ref netmem) { - atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count); + atomic_long_inc(netmem_get_pp_ref_count_ref(netmem)); } static inline void page_pool_ref_page(struct page *page) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index c022c410abe3..f53e2c90b686 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include <linux/dma-direction.h> #include <linux/ptr_ring.h> #include <linux/types.h> +#include <linux/xarray.h> #include <net/netmem.h> #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA @@ -33,6 +34,9 @@ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) +/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */ +#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) + /* * Fast allocation side cache array/stack * @@ -216,6 +220,8 @@ struct page_pool { void *mp_priv; + struct xarray dma_mapped; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4880b3a7aced..4229e4fcd2a9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -75,11 +75,11 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block) } #ifdef CONFIG_NET_CLS_ACT -DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); +DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key); static inline bool tcf_block_bypass_sw(struct tcf_block *block) { - return block && block->bypass_wanted; + return block && !atomic_read(&block->useswcnt); } #endif @@ -759,6 +759,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->extack = extack; } +static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags) +{ + if (tp->usesw) + return; + if (tc_skip_sw(flags) && tc_in_hw(flags)) + return; + tp->usesw = true; +} + #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb) { diff --git a/include/net/route.h b/include/net/route.h index 1789f1e6640b..8a11d19f897b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -208,12 +208,13 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src, const struct sk_buff *hint); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) + dscp_t dscp, struct net_device *devin) { int err; rcu_read_lock(); - err = ip_route_input_noref(skb, dst, src, tos, devin); + err = ip_route_input_noref(skb, dst, src, inet_dscp_to_dsfield(dscp), + devin); if (!err) { skb_dst_force(skb); if (!skb_dst(skb)) @@ -363,10 +364,15 @@ static inline int inet_iif(const struct sk_buff *skb) static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - struct net *net = dev_net(dst->dev); - if (hoplimit == 0) + if (hoplimit == 0) { + const struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + rcu_read_unlock(); + } return hoplimit; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5d74fa7e694c..a9d7e9ecee6b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -425,6 +425,7 @@ struct tcf_proto { spinlock_t lock; bool deleting; bool counted; + bool usesw; refcount_t refcnt; struct rcu_head rcu; struct hlist_node destroy_ht_node; @@ -474,9 +475,7 @@ struct tcf_block { struct flow_block flow_block; struct list_head owner_list; bool keep_dst; - bool bypass_wanted; - atomic_t filtercnt; /* Number of filters */ - atomic_t skipswcnt; /* Number of skip_sw filters */ + atomic_t useswcnt; atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ @@ -852,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, - __u64 bytes, __u32 packets) + __u64 bytes, __u64 packets) { u64_stats_update_begin(&bstats->syncp); u64_stats_add(&bstats->bytes, bytes); @@ -1032,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) return skb; } +static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct) +{ + struct sk_buff *skb; + + skb = __skb_dequeue(&sch->gso_skb); + if (skb) { + sch->q.qlen--; + return skb; + } + if (direct) + return __qdisc_dequeue_head(&sch->q); + else + return sch->dequeue(sch); +} + static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 31248cfdfb23..dcd288fa1bb6 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -775,6 +775,7 @@ struct sctp_transport { /* Reference counting. */ refcount_t refcnt; + __u32 dead:1, /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -784,7 +785,7 @@ struct sctp_transport { * calculation completes (i.e. the DATA chunk * is SACK'd) clear this flag. */ - __u32 rto_pending:1, + rto_pending:1, /* * hb_sent : a flag that signals that we have a pending diff --git a/include/net/sock.h b/include/net/sock.h index fa055cf1785e..722f409cccd3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -338,6 +338,8 @@ struct sk_filter; * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. + * @sk_owner: reference to the real owner of the socket that calls + * sock_lock_init_class_and_name(). */ struct sock { /* @@ -544,6 +546,10 @@ struct sock { struct rcu_head sk_rcu; netns_tracker ns_tracker; struct xarray sk_user_frags; + +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) + struct module *sk_owner; +#endif }; struct sock_bh_locked { @@ -1585,6 +1591,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) sk_mem_reclaim(sk); } +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ + __module_get(owner); + sk->sk_owner = owner; +} + +static inline void sk_owner_clear(struct sock *sk) +{ + sk->sk_owner = NULL; +} + +static inline void sk_owner_put(struct sock *sk) +{ + module_put(sk->sk_owner); +} +#else +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ +} + +static inline void sk_owner_clear(struct sock *sk) +{ +} + +static inline void sk_owner_put(struct sock *sk) +{ +} +#endif /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. @@ -1594,13 +1629,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ + sk_owner_set(sk, THIS_MODULE); \ sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ - sizeof((sk)->sk_lock)); \ + sizeof((sk)->sk_lock)); \ lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ - (skey), (sname)); \ + (skey), (sname)); \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) @@ -1744,6 +1780,7 @@ static inline bool sock_allow_reclassification(const struct sock *csk) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); +void sk_net_refcnt_upgrade(struct sock *sk); void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); void sk_free_unlock_clone(struct sock *sk); @@ -2906,8 +2943,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { - if (sk->sk_prot->sock_is_readable) - return sk->sk_prot->sock_is_readable(sk); + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->sock_is_readable) + return prot->sock_is_readable(sk); + return false; } #endif /* _SOCK_H */ diff --git a/include/net/strparser.h b/include/net/strparser.h index 41e2ce9e9e10..0a83010b3a64 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -43,6 +43,8 @@ struct strparser; struct strp_callbacks { int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + int (*read_sock)(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); int (*read_sock_done)(struct strparser *strp, int err); void (*abort_parser)(struct strparser *strp, int err); void (*lock)(struct strparser *strp); diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h index f071c1d70a25..a04bcac7adf4 100644 --- a/include/net/tc_act/tc_ctinfo.h +++ b/include/net/tc_act/tc_ctinfo.h @@ -18,9 +18,9 @@ struct tcf_ctinfo_params { struct tcf_ctinfo { struct tc_action common; struct tcf_ctinfo_params __rcu *params; - u64 stats_dscp_set; - u64 stats_dscp_error; - u64 stats_cpmark_set; + atomic64_t stats_dscp_set; + atomic64_t stats_dscp_error; + atomic64_t stats_cpmark_set; }; enum { diff --git a/include/net/tcp.h b/include/net/tcp.h index d1948d357dad..3255a199ef60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,6 +41,7 @@ #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> +#include <net/xfrm.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -683,6 +684,19 @@ void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); void tcp_sack_compress_send_ack(struct sock *sk); +static inline void tcp_cleanup_skb(struct sk_buff *skb) +{ + skb_dst_drop(skb); + secpath_reset(skb); +} + +static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb)); + __skb_queue_tail(&sk->sk_receive_queue, skb); +} + /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) @@ -729,6 +743,9 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); void tcp_read_done(struct sock *sk, size_t len); @@ -2595,6 +2612,11 @@ struct sk_psock; #ifdef CONFIG_BPF_SYSCALL int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#ifdef CONFIG_BPF_STREAM_PARSER +struct strparser; +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); +#endif /* CONFIG_BPF_STREAM_PARSER */ #endif /* CONFIG_BPF_SYSCALL */ #ifdef CONFIG_INET diff --git a/include/net/udp.h b/include/net/udp.h index 61222545ab1c..0b2e3a5e01d8 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -461,6 +461,16 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, { netdev_features_t features = NETIF_F_SG; struct sk_buff *segs; + int drop_count; + + /* + * Segmentation in UDP receive path is only for UDP GRO, drop udp + * fragmentation offload (UFO) packets. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) { + drop_count = 1; + goto drop; + } /* Avoid csum recalculation by skb_segment unless userspace explicitly * asks for the final checksum values @@ -484,16 +494,18 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, */ segs = __skb_gso_segment(skb, features, false); if (IS_ERR_OR_NULL(segs)) { - int segs_nr = skb_shinfo(skb)->gso_segs; - - atomic_add(segs_nr, &sk->sk_drops); - SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); - kfree_skb(skb); - return NULL; + drop_count = skb_shinfo(skb)->gso_segs; + goto drop; } consume_skb(skb); return segs; + +drop: + atomic_add(drop_count, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count); + kfree_skb(skb); + return NULL; } static inline void udp_post_segment_fix_csum(struct sk_buff *skb) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index bfe625b55d55..df3f5f07bc7c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -71,9 +71,6 @@ struct xdp_sock { */ u32 tx_budget_spent; - /* Protects generic receive. */ - spinlock_t rx_lock; - /* Statistics */ u64 rx_dropped; u64 rx_queue_full; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a0bdd58f401c..1484dd15a369 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -184,10 +184,13 @@ struct xfrm_state { }; struct hlist_node byspi; struct hlist_node byseq; + struct hlist_node state_cache; + struct hlist_node state_cache_input; refcount_t refcnt; spinlock_t lock; + u32 pcpu_num; struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; @@ -230,7 +233,6 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ @@ -536,6 +538,7 @@ struct xfrm_policy_queue { * @xp_net: network namespace the policy lives in * @bydst: hlist node for SPD hash table or rbtree list * @byidx: hlist node for index hash table + * @state_cache_list: hlist head for policy cached xfrm states * @lock: serialize changes to policy structure members * @refcnt: reference count, freed once it reaches 0 * @pos: kernel internal tie-breaker to determine age of policy @@ -566,6 +569,8 @@ struct xfrm_policy { struct hlist_node bydst; struct hlist_node byidx; + struct hlist_head state_cache_list; + /* This lock only affects elements except for entry. */ rwlock_t lock; refcount_t refcnt; @@ -1217,9 +1222,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (xo) { x = xfrm_input_state(skb); - if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) - return (xo->flags & CRYPTO_DONE) && - (xo->status & CRYPTO_SUCCESS); + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { + bool check = (xo->flags & CRYPTO_DONE) && + (xo->status & CRYPTO_SUCCESS); + + /* The packets here are plain ones and secpath was + * needed to indicate that hardware already handled + * them and there is no need to do nothing in addition. + * + * Consume secpath which was set by drivers. + */ + secpath_reset(skb); + return check; + } } return __xfrm_check_nopolicy(net, skb, dir) || @@ -1645,6 +1660,10 @@ int xfrm_state_update(struct xfrm_state *x); struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); +struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family); struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -1684,7 +1703,7 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); @@ -1796,7 +1815,7 @@ int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi, struct netlink_ext_ack *extack); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, - u8 mode, u32 reqid, u32 if_id, u8 proto, + u8 mode, u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family); diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index bacb33f1e3e5..823fd5c7a3b1 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -55,6 +55,8 @@ struct xsk_buff_pool { refcount_t users; struct xdp_umem *umem; struct work_struct work; + /* Protects generic receive in shared and non-shared umem mode. */ + spinlock_t rx_lock; struct list_head free_list; struct list_head xskb_list; u32 heads_cnt; |