diff options
Diffstat (limited to 'include/net')
61 files changed, 1735 insertions, 530 deletions
diff --git a/include/net/Space.h b/include/net/Space.h index c29f3d51c078..ef42629f4258 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -10,4 +10,3 @@ struct net_device *smc_init(int unit); struct net_device *cs89x0_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); -struct net_device *cops_probe(int unit); diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index b01cf9ac2437..e302c0e804d0 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -177,6 +177,9 @@ struct vsock_transport { /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); + + /* Zero-copy. */ + bool (*msgzerocopy_allow)(void); }; /**** CORE ****/ @@ -241,4 +244,8 @@ static inline void __init vsock_bpf_build_proto(void) {} #endif +static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) +{ + return t->msgzerocopy_allow && t->msgzerocopy_allow(); +} #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index aa90adc3b2a4..7ffa8c192c3f 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -541,7 +541,7 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, return ERR_PTR(-EFAULT); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); return skb; } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 87d92accc26e..bdee5d649cc6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1,6 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright 2023 NXP Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -673,6 +674,8 @@ enum { #define HCI_TX_POWER_INVALID 127 #define HCI_RSSI_INVALID 127 +#define HCI_SYNC_HANDLE_INVALID 0xffff + #define HCI_ROLE_MASTER 0x00 #define HCI_ROLE_SLAVE 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index e6359f7346f1..20988623c5cc 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -350,7 +350,9 @@ struct hci_dev { struct list_head list; struct mutex lock; - char name[8]; + struct ida unset_handle_ida; + + const char *name; unsigned long flags; __u16 id; __u8 bus; @@ -1290,8 +1292,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *hdev, - __u8 handle) +static inline struct hci_conn * +hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1299,22 +1301,22 @@ static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev * rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != ISO_LINK || + !test_bit(HCI_CONN_PA_SYNC, &c->flags)) continue; - if (handle != BT_ISO_QOS_BIG_UNSET && handle == c->iso_qos.bcast.big) { + if (c->iso_qos.bcast.big == big) { rcu_read_unlock(); return c; } } - rcu_read_unlock(); return NULL; } static inline struct hci_conn * -hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big) +hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1326,7 +1328,7 @@ hci_conn_hash_lookup_pa_sync(struct hci_dev *hdev, __u8 big) !test_bit(HCI_CONN_PA_SYNC, &c->flags)) continue; - if (c->iso_qos.bcast.big == big) { + if (c->sync_handle == sync_handle) { rcu_read_unlock(); return c; } @@ -1377,6 +1379,26 @@ static inline void hci_conn_hash_list_state(struct hci_dev *hdev, rcu_read_unlock(); } +static inline void hci_conn_hash_list_flag(struct hci_dev *hdev, + hci_conn_func_t func, __u8 type, + __u8 flag, void *data) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + if (!func) + return; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == type && test_bit(flag, &c->flags)) + func(c, data); + } + + rcu_read_unlock(); +} + static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; @@ -1426,7 +1448,9 @@ int hci_le_create_cis_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, - u8 role); + u8 role, u16 handle); +struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, + bdaddr_t *dst, u8 role); void hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 2d5fcda1bcd0..082f89531b88 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -56,7 +56,7 @@ struct hci_mon_new_index { __u8 type; __u8 bus; bdaddr_t bdaddr; - char name[8]; + char name[8] __nonstring; } __packed; #define HCI_MON_NEW_INDEX_SIZE 16 diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 57eeb07aeb25..6efbc2152146 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -80,6 +80,8 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval); +int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance); + int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force); int hci_disable_advertising_sync(struct hci_dev *hdev); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3a4b684f89bf..b137a33a1b68 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -76,6 +76,8 @@ struct wiphy; * @IEEE80211_CHAN_DISABLED: This channel is disabled. * @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes * sending probe requests or beaconing. + * @IEEE80211_CHAN_PSD: Power spectral density (in dBm) is set for this + * channel. * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel * is not permitted. @@ -119,7 +121,7 @@ struct wiphy; enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, IEEE80211_CHAN_NO_IR = 1<<1, - /* hole at 1<<2 */ + IEEE80211_CHAN_PSD = 1<<2, IEEE80211_CHAN_RADAR = 1<<3, IEEE80211_CHAN_NO_HT40PLUS = 1<<4, IEEE80211_CHAN_NO_HT40MINUS = 1<<5, @@ -171,6 +173,7 @@ enum ieee80211_channel_flags { * on this channel. * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. + * @psd: power spectral density (in dBm) */ struct ieee80211_channel { enum nl80211_band band; @@ -187,6 +190,7 @@ struct ieee80211_channel { enum nl80211_dfs_state dfs_state; unsigned long dfs_state_entered; unsigned int dfs_cac_ms; + s8 psd; }; /** @@ -410,6 +414,19 @@ struct ieee80211_sta_eht_cap { u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN]; }; +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ +#ifdef __CHECKER__ +/* + * This is used to mark the sband->iftype_data pointer which is supposed + * to be an array with special access semantics (per iftype), but a lot + * of code got it wrong in the past, so with this marking sparse will be + * noisy when the pointer is used directly. + */ +# define __iftd __attribute__((noderef, address_space(__iftype_data))) +#else +# define __iftd +#endif /* __CHECKER__ */ + /** * struct ieee80211_sband_iftype_data - sband data per interface type * @@ -543,10 +560,48 @@ struct ieee80211_supported_band { struct ieee80211_sta_s1g_cap s1g_cap; struct ieee80211_edmg edmg_cap; u16 n_iftype_data; - const struct ieee80211_sband_iftype_data *iftype_data; + const struct ieee80211_sband_iftype_data __iftd *iftype_data; }; /** + * _ieee80211_set_sband_iftype_data - set sband iftype data array + * @sband: the sband to initialize + * @iftd: the iftype data array pointer + * @n_iftd: the length of the iftype data array + * + * Set the sband iftype data array; use this where the length cannot + * be derived from the ARRAY_SIZE() of the argument, but prefer + * ieee80211_set_sband_iftype_data() where it can be used. + */ +static inline void +_ieee80211_set_sband_iftype_data(struct ieee80211_supported_band *sband, + const struct ieee80211_sband_iftype_data *iftd, + u16 n_iftd) +{ + sband->iftype_data = (const void __iftd __force *)iftd; + sband->n_iftype_data = n_iftd; +} + +/** + * ieee80211_set_sband_iftype_data - set sband iftype data array + * @sband: the sband to initialize + * @iftd: the iftype data array + */ +#define ieee80211_set_sband_iftype_data(sband, iftd) \ + _ieee80211_set_sband_iftype_data(sband, iftd, ARRAY_SIZE(iftd)) + +/** + * for_each_sband_iftype_data - iterate sband iftype data entries + * @sband: the sband whose iftype_data array to iterate + * @i: iterator counter + * @iftd: iftype data pointer to set + */ +#define for_each_sband_iftype_data(sband, i, iftd) \ + for (i = 0, iftd = (const void __force *)&(sband)->iftype_data[i]; \ + i < (sband)->n_iftype_data; \ + i++, iftd = (const void __force *)&(sband)->iftype_data[i]) + +/** * ieee80211_get_sband_iftype_data - return sband data for a given iftype * @sband: the sband to search for the STA on * @iftype: enum nl80211_iftype @@ -557,6 +612,7 @@ static inline const struct ieee80211_sband_iftype_data * ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, u8 iftype) { + const struct ieee80211_sband_iftype_data *data; int i; if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) @@ -565,10 +621,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, if (iftype == NL80211_IFTYPE_AP_VLAN) iftype = NL80211_IFTYPE_AP; - for (i = 0; i < sband->n_iftype_data; i++) { - const struct ieee80211_sband_iftype_data *data = - &sband->iftype_data[i]; - + for_each_sband_iftype_data(sband, i, data) { if (data->types_mask & BIT(iftype)) return data; } @@ -954,6 +1007,30 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, enum nl80211_iftype iftype); /** + * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable and we + * can/need start CAC on such channel + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Return: true if all channels available and at least + * one channel requires CAC (NL80211_DFS_USABLE) + */ +bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + +/** + * cfg80211_chandef_dfs_cac_time - get the DFS CAC time (in ms) for given + * channel definition + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * + * Returns: DFS CAC time (in ms) which applies for this channel definition + */ +unsigned int +cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef); + +/** * nl80211_send_chandef - sends the channel definition. * @msg: the msg to send channel definition * @chandef: the channel definition to check @@ -1289,6 +1366,7 @@ struct cfg80211_acl_data { * struct cfg80211_fils_discovery - FILS discovery parameters from * IEEE Std 802.11ai-2016, Annex C.3 MIB detail. * + * @update: Set to true if the feature configuration should be updated. * @min_interval: Minimum packet interval in TUs (0 - 10000) * @max_interval: Maximum packet interval in TUs (0 - 10000) * @tmpl_len: Template length @@ -1296,6 +1374,7 @@ struct cfg80211_acl_data { * frame headers. */ struct cfg80211_fils_discovery { + bool update; u32 min_interval; u32 max_interval; size_t tmpl_len; @@ -1306,6 +1385,7 @@ struct cfg80211_fils_discovery { * struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe * response parameters in 6GHz. * + * @update: Set to true if the feature configuration should be updated. * @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned * in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive * scanning @@ -1313,6 +1393,7 @@ struct cfg80211_fils_discovery { * @tmpl: Template data for probe response */ struct cfg80211_unsol_bcast_probe_resp { + bool update; u32 interval; size_t tmpl_len; const u8 *tmpl; @@ -1399,6 +1480,22 @@ struct cfg80211_ap_settings { u16 punct_bitmap; }; + +/** + * struct cfg80211_ap_update - AP configuration update + * + * Subset of &struct cfg80211_ap_settings, for updating a running AP. + * + * @beacon: beacon data + * @fils_discovery: FILS discovery transmission parameters + * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters + */ +struct cfg80211_ap_update { + struct cfg80211_beacon_data beacon; + struct cfg80211_fils_discovery fils_discovery; + struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; +}; + /** * struct cfg80211_csa_settings - channel switch settings * @@ -2346,7 +2443,7 @@ struct mesh_config { * @user_mpm: userspace handles all MPM functions * @dtim_period: DTIM period to use * @beacon_interval: beacon interval to use - * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] + * @mcast_rate: multicast rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh * @beacon_rate: bitrate to be used for beacons * @userspace_handles_dfs: whether user space controls DFS operation, i.e. @@ -2487,7 +2584,6 @@ struct cfg80211_scan_6ghz_params { * @n_ssids: number of SSIDs * @channels: channels to scan on. * @n_channels: total number of channels to scan - * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @duration: how long to listen on each channel, in TUs. If @@ -2517,7 +2613,6 @@ struct cfg80211_scan_request { struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; - enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u16 duration; @@ -2566,7 +2661,7 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied - * for filtering out scan results received. Drivers advertize this support + * for filtering out scan results received. Drivers advertise this support * of band specific rssi based filtering through the feature capability * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band * specific rssi thresholds take precedence over rssi_thold, if specified. @@ -2612,14 +2707,13 @@ struct cfg80211_bss_select_adjust { * @ssids: SSIDs to scan for (passed in the probe_reqs in active scans) * @n_ssids: number of SSIDs * @n_channels: total number of channels to scan - * @scan_width: channel width for scanning * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @flags: control flags from &enum nl80211_scan_flags * @match_sets: sets of parameters to be matched for a scan result * entry to be considered valid and to be passed to the host * (others are filtered out). - * If ommited, all results are passed. + * If omitted, all results are passed. * @n_match_sets: number of match sets * @report_results: indicates that results were reported for this request * @wiphy: the wiphy this was for @@ -2653,14 +2747,13 @@ struct cfg80211_bss_select_adjust { * to the specified band while deciding whether a better BSS is reported * using @relative_rssi. If delta is a negative number, the BSSs that * belong to the specified band will be penalized by delta dB in relative - * comparisions. + * comparisons. */ struct cfg80211_sched_scan_request { u64 reqid; struct cfg80211_ssid *ssids; int n_ssids; u32 n_channels; - enum nl80211_bss_scan_width scan_width; const u8 *ie; size_t ie_len; u32 flags; @@ -2708,7 +2801,6 @@ enum cfg80211_signal_type { /** * struct cfg80211_inform_bss - BSS inform data * @chan: channel the frame was received on - * @scan_width: scan width that was used * @signal: signal strength value, according to the wiphy's * signal type * @boottime_ns: timestamp (CLOCK_BOOTTIME) when the information was @@ -2728,7 +2820,6 @@ enum cfg80211_signal_type { */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; - enum nl80211_bss_scan_width scan_width; s32 signal; u64 boottime_ns; u64 parent_tsf; @@ -2762,7 +2853,6 @@ struct cfg80211_bss_ies { * for use in scan results and similar. * * @channel: channel this BSS is on - * @scan_width: width of the control channel * @bssid: BSSID of the BSS * @beacon_interval: the beacon interval as from the frame * @capability: the capability field in host byte order @@ -2792,7 +2882,6 @@ struct cfg80211_bss_ies { */ struct cfg80211_bss { struct ieee80211_channel *channel; - enum nl80211_bss_scan_width scan_width; const struct cfg80211_bss_ies __rcu *ies; const struct cfg80211_bss_ies __rcu *beacon_ies; @@ -2891,12 +2980,15 @@ struct cfg80211_auth_request { * @elems_len: length of the elements * @disabled: If set this link should be included during association etc. but it * should not be used until enabled by the AP MLD. + * @error: per-link error code, must be <= 0. If there is an error, then the + * operation as a whole must fail. */ struct cfg80211_assoc_link { struct cfg80211_bss *bss; const u8 *elems; size_t elems_len; bool disabled; + int error; }; /** @@ -3495,7 +3587,7 @@ struct cfg80211_update_ft_ies_params { * This structure provides information needed to transmit a mgmt frame * * @chan: channel to use - * @offchan: indicates wether off channel operation is required + * @offchan: indicates whether off channel operation is required * @wait: duration for ROC * @buf: buffer to transmit * @len: buffer length @@ -3613,7 +3705,7 @@ struct cfg80211_nan_func_filter { * @publish_bcast: if true, the solicited publish should be broadcasted * @subscribe_active: if true, the subscribe is active * @followup_id: the instance ID for follow up - * @followup_reqid: the requestor instance ID for follow up + * @followup_reqid: the requester instance ID for follow up * @followup_dest: MAC address of the recipient of the follow up * @ttl: time to live counter in DW. * @serv_spec_info: Service Specific Info @@ -4450,7 +4542,7 @@ struct cfg80211_ops { int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *info); + struct cfg80211_ap_update *info); int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id); @@ -4816,6 +4908,8 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys. * @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for * NL80211_REGDOM_SET_BY_DRIVER. + * @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver + * set this flag to update channels on beacon hints. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -4842,6 +4936,7 @@ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER = BIT(24), + WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON = BIT(25), }; /** @@ -5826,6 +5921,16 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work); */ void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work); +/** + * wiphy_work_flush - flush previously queued work + * @wiphy: the wiphy, for debug purposes + * @work: the work to flush, this can be %NULL to flush all work + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work); + struct wiphy_delayed_work { struct wiphy_work work; struct wiphy *wiphy; @@ -5870,6 +5975,17 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); /** + * wiphy_delayed_work_flush - flush previously queued delayed work + * @wiphy: the wiphy, for debug purposes + * @dwork: the delayed work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_delayed_work_flush(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork); + +/** * struct wireless_dev - wireless device state * * For netdevs, this structure must be allocated by the driver @@ -5917,8 +6033,6 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, * @mgmt_registrations: list of registrations for management frames * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver - * @mtx: mutex used to lock data in this struct, may be used by drivers - * and some API functions require it held * @beacon_interval: beacon interval used on this device for transmitting * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL @@ -5941,6 +6055,7 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, * @event_lock: (private) lock for event list * @owner_nlportid: (private) owner socket port ID * @nl_owner_dead: (private) owner socket went away + * @cqm_rssi_work: (private) CQM RSSI reporting work * @cqm_config: (private) nl80211 RSSI monitor state * @pmsr_list: (private) peer measurement requests * @pmsr_lock: (private) peer measurements requests/results lock @@ -5964,8 +6079,6 @@ struct wireless_dev { struct list_head mgmt_registrations; u8 mgmt_registrations_need_update:1; - struct mutex mtx; - bool use_4addr, is_running, registered, registering; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -6013,7 +6126,8 @@ struct wireless_dev { } wext; #endif - struct cfg80211_cqm_config *cqm_config; + struct wiphy_work cqm_rssi_work; + struct cfg80211_cqm_config __rcu *cqm_config; struct list_head pmsr_list; spinlock_t pmsr_lock; @@ -6255,13 +6369,11 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, /** * ieee80211_mandatory_rates - get mandatory rates for a given band * @sband: the band to look for rates in - * @scan_width: width of the control channel * * This function returns a bitmap of the mandatory rates for the given * band, bits are set according to the rate position in the bitrates array. */ -u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, - enum nl80211_bss_scan_width scan_width); +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband); /* * Radiotap parsing functions -- for controlled injection support @@ -6602,7 +6714,7 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) * @ies: data consisting of IEs * @len: length of data * - * Return: %NULL if the etended element could not be found or if + * Return: %NULL if the extended element could not be found or if * the element is invalid (claims to be longer than the given * data) or if the byte array doesn't match; otherwise return the * requested element struct. @@ -6749,7 +6861,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2); /** * regulatory_set_wiphy_regd - set regdom info for self managed drivers * @wiphy: the wireless device we want to process the regulatory domain on - * @rd: the regulatory domain informatoin to use for this wiphy + * @rd: the regulatory domain information to use for this wiphy * * Set the regulatory domain information for self-managed wiphys, only they * may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more @@ -6840,7 +6952,7 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy); * Regulatory self-managed driver can use it to proactively * * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried. - * @freq: the freqency(in MHz) to be queried. + * @freq: the frequency (in MHz) to be queried. * @rule: pointer to store the wmm rule from the regulatory db. * * Self-managed wireless drivers can use this function to query @@ -6923,22 +7035,6 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, gfp_t gfp); static inline struct cfg80211_bss * __must_check -cfg80211_inform_bss_width_frame(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - struct ieee80211_mgmt *mgmt, size_t len, - s32 signal, gfp_t gfp) -{ - struct cfg80211_inform_bss data = { - .chan = rx_channel, - .scan_width = scan_width, - .signal = signal, - }; - - return cfg80211_inform_bss_frame_data(wiphy, &data, mgmt, len, gfp); -} - -static inline struct cfg80211_bss * __must_check cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *rx_channel, struct ieee80211_mgmt *mgmt, size_t len, @@ -6946,7 +7042,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, { struct cfg80211_inform_bss data = { .chan = rx_channel, - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .signal = signal, }; @@ -7049,26 +7144,6 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, gfp_t gfp); static inline struct cfg80211_bss * __must_check -cfg80211_inform_bss_width(struct wiphy *wiphy, - struct ieee80211_channel *rx_channel, - enum nl80211_bss_scan_width scan_width, - enum cfg80211_bss_frame_type ftype, - const u8 *bssid, u64 tsf, u16 capability, - u16 beacon_interval, const u8 *ie, size_t ielen, - s32 signal, gfp_t gfp) -{ - struct cfg80211_inform_bss data = { - .chan = rx_channel, - .scan_width = scan_width, - .signal = signal, - }; - - return cfg80211_inform_bss_data(wiphy, &data, ftype, bssid, tsf, - capability, beacon_interval, ie, ielen, - gfp); -} - -static inline struct cfg80211_bss * __must_check cfg80211_inform_bss(struct wiphy *wiphy, struct ieee80211_channel *rx_channel, enum cfg80211_bss_frame_type ftype, @@ -7078,7 +7153,6 @@ cfg80211_inform_bss(struct wiphy *wiphy, { struct cfg80211_inform_bss data = { .chan = rx_channel, - .scan_width = NL80211_BSS_CHAN_WIDTH_20, .signal = signal, }; @@ -7163,19 +7237,6 @@ void cfg80211_bss_iter(struct wiphy *wiphy, void *data), void *iter_data); -static inline enum nl80211_bss_scan_width -cfg80211_chandef_to_scan_width(const struct cfg80211_chan_def *chandef) -{ - switch (chandef->width) { - case NL80211_CHAN_WIDTH_5: - return NL80211_BSS_CHAN_WIDTH_5; - case NL80211_CHAN_WIDTH_10: - return NL80211_BSS_CHAN_WIDTH_10; - default: - return NL80211_BSS_CHAN_WIDTH_20; - } -} - /** * cfg80211_rx_mlme_mgmt - notification of processed MLME management frame * @dev: network device @@ -7208,7 +7269,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len); void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); /** - * struct cfg80211_rx_assoc_resp - association response data + * struct cfg80211_rx_assoc_resp_data - association response data * @bss: the BSS that association was requested with, ownership of the pointer * moves to cfg80211 in the call to cfg80211_rx_assoc_resp() * @buf: (Re)Association Response frame (header + body) @@ -7223,7 +7284,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * @links.status: Set this (along with a BSS pointer) for links that * were rejected by the AP. */ -struct cfg80211_rx_assoc_resp { +struct cfg80211_rx_assoc_resp_data { const u8 *buf; size_t len; const u8 *req_ies; @@ -7231,7 +7292,7 @@ struct cfg80211_rx_assoc_resp { int uapsd_queues; const u8 *ap_mld_addr; struct { - const u8 *addr; + u8 addr[ETH_ALEN] __aligned(2); struct cfg80211_bss *bss; u16 status; } links[IEEE80211_MLD_MAX_NUM_LINKS]; @@ -7240,7 +7301,7 @@ struct cfg80211_rx_assoc_resp { /** * cfg80211_rx_assoc_resp - notification of processed association response * @dev: network device - * @data: association response data, &struct cfg80211_rx_assoc_resp + * @data: association response data, &struct cfg80211_rx_assoc_resp_data * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -7248,7 +7309,7 @@ struct cfg80211_rx_assoc_resp { * This function may sleep. The caller must hold the corresponding wdev's mutex. */ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_rx_assoc_resp *data); + struct cfg80211_rx_assoc_resp_data *data); /** * struct cfg80211_assoc_failure - association failure data @@ -7967,7 +8028,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * cfg80211_port_authorized - notify cfg80211 of successful security association * * @dev: network device - * @bssid: the BSSID of the AP + * @peer_addr: BSSID of the AP/P2P GO in case of STA/GC or STA/GC MAC address + * in case of AP/P2P GO * @td_bitmap: transition disable policy * @td_bitmap_len: Length of transition disable policy * @gfp: allocation flags @@ -7978,8 +8040,11 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * should be preceded with a call to cfg80211_connect_result(), * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to * indicate the 802.11 association. + * This function can also be called by AP/P2P GO driver that supports + * authentication offload. In this case the peer_mac passed is that of + * associated STA/GC. */ -void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, +void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr, const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp); /** @@ -8568,7 +8633,7 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * @link_id: the link ID for MLO, must be 0 for non-MLO * @punct_bitmap: the new puncturing bitmap * - * Caller must acquire wdev_lock, therefore must only be called from sleepable + * Caller must hold wiphy mutex, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, @@ -8808,6 +8873,18 @@ static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen, } /** + * ieee80211_fragment_element - fragment the last element in skb + * @skb: The skbuf that the element was added to + * @len_pos: Pointer to length of the element to fragment + * @frag_id: The element ID to use for fragments + * + * This function fragments all data after @len_pos, adding fragmentation + * elements with the given ID as appropriate. The SKB will grow in size + * accordingly. + */ +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); + +/** * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN * @wdev: the wireless device reporting the wakeup * @wakeup: the wakeup report @@ -9056,9 +9133,9 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, /** * cfg80211_assoc_comeback - notification of association that was - * temporarly rejected with a comeback + * temporarily rejected with a comeback * @netdev: network device - * @ap_addr: AP (MLD) address that rejected the assocation + * @ap_addr: AP (MLD) address that rejected the association * @timeout: timeout interval value TUs. * * this function may sleep. the caller must hold the corresponding wdev's mutex. diff --git a/include/net/devlink.h b/include/net/devlink.h index 29fd1b4ee654..9ac394bdfbe4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -150,6 +150,7 @@ struct devlink_port { struct devlink_rate *devlink_rate; struct devlink_linecard *linecard; + u32 rel_index; }; struct devlink_port_new_attrs { @@ -1697,6 +1698,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); +int devl_port_fn_devlink_set(struct devlink_port *devlink_port, + struct devlink *fn_devlink); struct devlink_rate * devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, struct devlink_rate *parent); @@ -1717,8 +1720,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); void devlink_linecard_activate(struct devlink_linecard *linecard); void devlink_linecard_deactivate(struct devlink_linecard *linecard); -void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, - struct devlink *nested_devlink); +int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink); int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -1851,36 +1854,36 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, const char *version_value, enum devlink_info_version_type version_type); -int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); -int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); -int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name); -int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); -int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name); -int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); - -int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); -int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); -int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len); - -int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value); -int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value); -int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value); -int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value); -int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value); -int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u32 value_len); +void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg); +void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name); +void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg); +void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name); +void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg); + +void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value); +void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value); +void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len); + +void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value); +void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value); +void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value); +void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value); +void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value); +void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u32 value_len); struct devlink_health_reporter * devl_port_health_reporter_create(struct devlink_port *port, @@ -1918,6 +1921,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, void devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter); +int devl_nested_devlink_set(struct devlink *devlink, + struct devlink *nested_devlink); bool devlink_is_reload_failed(const struct devlink *devlink); void devlink_remote_reload_actions_performed(struct devlink *devlink, enum devlink_reload_limit limit, diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a587e83fc169..3c70ad53a49c 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -20,9 +20,14 @@ FN(IP_NOPROTO) \ FN(SOCKET_RCVBUFF) \ FN(PROTO_MEM) \ + FN(TCP_AUTH_HDR) \ FN(TCP_MD5NOTFOUND) \ FN(TCP_MD5UNEXPECTED) \ FN(TCP_MD5FAILURE) \ + FN(TCP_AONOTFOUND) \ + FN(TCP_AOUNEXPECTED) \ + FN(TCP_AOKEYNOTFOUND) \ + FN(TCP_AOFAILURE) \ FN(SOCKET_BACKLOG) \ FN(TCP_FLAGS) \ FN(TCP_ZEROWINDOW) \ @@ -80,6 +85,7 @@ FN(IPV6_NDISC_BAD_OPTIONS) \ FN(IPV6_NDISC_NS_OTHERHOST) \ FN(QUEUE_PURGE) \ + FN(TC_ERROR) \ FNe(MAX) /** @@ -142,6 +148,11 @@ enum skb_drop_reason { */ SKB_DROP_REASON_PROTO_MEM, /** + * @SKB_DROP_REASON_TCP_AUTH_HDR: TCP-MD5 or TCP-AO hashes are met + * twice or set incorrectly. + */ + SKB_DROP_REASON_TCP_AUTH_HDR, + /** * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, * corresponding to LINUX_MIB_TCPMD5NOTFOUND */ @@ -157,6 +168,26 @@ enum skb_drop_reason { */ SKB_DROP_REASON_TCP_MD5FAILURE, /** + * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected, + * corresponding to LINUX_MIB_TCPAOREQUIRED + */ + SKB_DROP_REASON_TCP_AONOTFOUND, + /** + * @SKB_DROP_REASON_TCP_AOUNEXPECTED: TCP-AO hash is present and it + * was not expected, corresponding to LINUX_MIB_TCPAOKEYNOTFOUND + */ + SKB_DROP_REASON_TCP_AOUNEXPECTED, + /** + * @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown, + * corresponding to LINUX_MIB_TCPAOKEYNOTFOUND + */ + SKB_DROP_REASON_TCP_AOKEYNOTFOUND, + /** + * @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong, + * corresponding to LINUX_MIB_TCPAOBAD + */ + SKB_DROP_REASON_TCP_AOFAILURE, + /** * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( * see LINUX_MIB_TCPBACKLOGDROP) */ @@ -345,6 +376,8 @@ enum skb_drop_reason { SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST, /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */ SKB_DROP_REASON_QUEUE_PURGE, + /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */ + SKB_DROP_REASON_TC_ERROR, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/dsa.h b/include/net/dsa.h index 0b9c6aa27047..82135fbdb1e6 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -102,11 +102,11 @@ struct dsa_device_ops { const char *name; enum dsa_tag_protocol proto; /* Some tagging protocols either mangle or shift the destination MAC - * address, in which case the DSA master would drop packets on ingress + * address, in which case the DSA conduit would drop packets on ingress * if what it understands out of the destination MAC address is not in * its RX filter. */ - bool promisc_on_master; + bool promisc_on_conduit; }; struct dsa_lag { @@ -236,12 +236,12 @@ struct dsa_bridge { }; struct dsa_port { - /* A CPU port is physically connected to a master device. - * A user port exposed to userspace has a slave device. + /* A CPU port is physically connected to a conduit device. A user port + * exposes a network device to user-space, called 'user' here. */ union { - struct net_device *master; - struct net_device *slave; + struct net_device *conduit; + struct net_device *user; }; /* Copy of the tagging protocol operations, for quicker access @@ -249,7 +249,7 @@ struct dsa_port { */ const struct dsa_device_ops *tag_ops; - /* Copies for faster access in master receive hot path */ + /* Copies for faster access in conduit receive hot path */ struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); @@ -281,9 +281,9 @@ struct dsa_port { u8 lag_tx_enabled:1; - /* Master state bits, valid only on CPU ports */ - u8 master_admin_up:1; - u8 master_oper_up:1; + /* conduit state bits, valid only on CPU ports */ + u8 conduit_admin_up:1; + u8 conduit_oper_up:1; /* Valid only on user ports */ u8 cpu_port_in_lag:1; @@ -303,7 +303,7 @@ struct dsa_port { struct list_head list; /* - * Original copy of the master netdev ethtool_ops + * Original copy of the conduit netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; @@ -452,10 +452,10 @@ struct dsa_switch { const struct dsa_switch_ops *ops; /* - * Slave mii_bus and devices for the individual ports. + * User mii_bus and devices for the individual ports. */ u32 phys_mii_mask; - struct mii_bus *slave_mii_bus; + struct mii_bus *user_mii_bus; /* Ageing Time limits in msecs */ unsigned int ageing_time_min; @@ -520,10 +520,10 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_UNUSED; } -static inline bool dsa_port_master_is_operational(struct dsa_port *dp) +static inline bool dsa_port_conduit_is_operational(struct dsa_port *dp) { - return dsa_port_is_cpu(dp) && dp->master_admin_up && - dp->master_oper_up; + return dsa_port_is_cpu(dp) && dp->conduit_admin_up && + dp->conduit_oper_up; } static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) @@ -713,12 +713,12 @@ static inline bool dsa_port_offloads_lag(struct dsa_port *dp, return dsa_port_lag_dev_get(dp) == lag->dev; } -static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp) +static inline struct net_device *dsa_port_to_conduit(const struct dsa_port *dp) { if (dp->cpu_port_in_lag) return dsa_port_lag_dev_get(dp->cpu_dp); - return dp->cpu_dp->master; + return dp->cpu_dp->conduit; } static inline @@ -732,7 +732,7 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) else if (dp->hsr_dev) return dp->hsr_dev; - return dp->slave; + return dp->user; } static inline struct net_device * @@ -834,9 +834,9 @@ struct dsa_switch_ops { int (*connect_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); - int (*port_change_master)(struct dsa_switch *ds, int port, - struct net_device *master, - struct netlink_ext_ack *extack); + int (*port_change_conduit)(struct dsa_switch *ds, int port, + struct net_device *conduit, + struct netlink_ext_ack *extack); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); @@ -969,6 +969,16 @@ struct dsa_switch_ops { struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); + + /* + * Notification for MAC address changes on user ports. Drivers can + * currently only veto operations. They should not use the method to + * program the hardware, since the operation is not rolled back in case + * of other errors. + */ + int (*port_set_mac_address)(struct dsa_switch *ds, int port, + const unsigned char *addr); + /* * Compatibility between device trees defining multiple CPU ports and * drivers which are not OK to use by default the numerically smallest @@ -1198,7 +1208,8 @@ struct dsa_switch_ops { * HSR integration */ int (*port_hsr_join)(struct dsa_switch *ds, int port, - struct net_device *hsr); + struct net_device *hsr, + struct netlink_ext_ack *extack); int (*port_hsr_leave)(struct dsa_switch *ds, int port, struct net_device *hsr); @@ -1222,11 +1233,11 @@ struct dsa_switch_ops { int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); /* - * DSA master tracking operations + * DSA conduit tracking operations */ - void (*master_state_change)(struct dsa_switch *ds, - const struct net_device *master, - bool operational); + void (*conduit_state_change)(struct dsa_switch *ds, + const struct net_device *conduit, + bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -1363,9 +1374,9 @@ static inline int dsa_switch_resume(struct dsa_switch *ds) #endif /* CONFIG_PM_SLEEP */ #if IS_ENABLED(CONFIG_NET_DSA) -bool dsa_slave_dev_check(const struct net_device *dev); +bool dsa_user_dev_check(const struct net_device *dev); #else -static inline bool dsa_slave_dev_check(const struct net_device *dev) +static inline bool dsa_user_dev_check(const struct net_device *dev) { return false; } diff --git a/include/net/dsa_stubs.h b/include/net/dsa_stubs.h index 361811750a54..6f384897f287 100644 --- a/include/net/dsa_stubs.h +++ b/include/net/dsa_stubs.h @@ -13,14 +13,14 @@ extern const struct dsa_stubs *dsa_stubs; struct dsa_stubs { - int (*master_hwtstamp_validate)(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack); + int (*conduit_hwtstamp_validate)(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); }; -static inline int dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) +static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { if (!netdev_uses_dsa(dev)) return 0; @@ -29,18 +29,18 @@ static inline int dsa_master_hwtstamp_validate(struct net_device *dev, * netdev_uses_dsa() returns true, the dsa_core module is still * registered, and so, dsa_unregister_stubs() couldn't have run. * For netdev_uses_dsa() to start returning false, it would imply that - * dsa_master_teardown() has executed, which requires rtnl_lock(). + * dsa_conduit_teardown() has executed, which requires rtnl_lock(). */ ASSERT_RTNL(); - return dsa_stubs->master_hwtstamp_validate(dev, config, extack); + return dsa_stubs->conduit_hwtstamp_validate(dev, config, extack); } #else -static inline int dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) +static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return 0; } diff --git a/include/net/dst.h b/include/net/dst.h index 78884429deed..f5dfc8fb7b37 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -222,13 +222,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr return msecs_to_jiffies(dst_metric(dst, metric)); } -static inline u32 -dst_allfrag(const struct dst_entry *dst) -{ - int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG); - return ret; -} - static inline int dst_metric_locked(const struct dst_entry *dst, int metric) { @@ -392,10 +385,10 @@ static inline int dst_discard(struct sk_buff *skb) { return dst_discard_out(&init_net, skb->sk, skb); } -void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags); struct dst_entry *dst_destroy(struct dst_entry *dst); void dst_dev_put(struct dst_entry *dst); diff --git a/include/net/flow.h b/include/net/flow.h index 7f0adda3bf2f..335bbc52171c 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -40,8 +40,8 @@ struct flowi_common { #define FLOWI_FLAG_KNOWN_NH 0x02 __u32 flowic_secid; kuid_t flowic_uid; - struct flowi_tunnel flowic_tun_key; __u32 flowic_multipath_hash; + struct flowi_tunnel flowic_tun_key; }; union flowi_uli { diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 9efa9a59e81f..314087a5e181 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -333,7 +333,7 @@ struct flow_action_entry { struct flow_action { unsigned int num_entries; - struct flow_action_entry entries[]; + struct flow_action_entry entries[] __counted_by(num_entries); }; static inline bool flow_action_has_entries(const struct flow_action *action) diff --git a/include/net/gro.h b/include/net/gro.h index 88644b3ca660..b435f0ddbf64 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -41,7 +41,7 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; - /* Used in ipv6_gro_receive() and foo-over-udp */ + /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ u16 proto; /* Used in napi_gro_cb::free */ diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 2338f8d2a8b3..925bac726a92 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -539,6 +539,12 @@ enum ieee80211_radiotap_eht_usig_common { IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK = 0x00000080, IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_20MHZ = 0, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_40MHZ = 1, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_80MHZ = 2, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_160MHZ = 3, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_1 = 4, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_2 = 5, IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR = 0x01f80000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP = 0xfe000000, diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index c8490729b4ae..3e454c4d7ba6 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -89,7 +89,7 @@ struct ip6_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - struct in6_addr sl_addr[]; + struct in6_addr sl_addr[] __counted_by(sl_max); }; #define IP6_SFBLOCK 10 /* allocate this many at once */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5d2fcc137b88..d0a2f827d5f2 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -44,7 +44,6 @@ struct inet_connection_sock_af_ops { struct request_sock *req_unhash, bool *own_req); u16 net_header_len; - u16 net_frag_header_len; u16 sockaddr_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); @@ -114,7 +113,10 @@ struct inet_connection_sock { __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 retry; /* Number of attempts */ - __u32 ato; /* Predicted tick of soft clock */ + #define ATO_BITS 8 + __u32 ato:ATO_BITS, /* Predicted tick of soft clock */ + lrcv_flowlabel:20, /* last received ipv6 flowlabel */ + unused:4; unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ @@ -325,11 +327,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 1 - static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { - inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; + inet_csk(sk)->icsk_ack.pingpong = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); } static inline void inet_csk_exit_pingpong_mode(struct sock *sk) @@ -339,7 +340,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk) static inline bool inet_csk_in_pingpong_mode(struct sock *sk) { - return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; + return inet_csk(sk)->icsk_ack.pingpong >= + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); +} + +static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ack.pingpong < U8_MAX) + icsk->icsk_ack.pingpong++; } static inline bool inet_csk_has_ulp(const struct sock *sk) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 2de0e4d4a027..74db6d97cae1 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -244,7 +244,6 @@ struct inet_sock { }; #define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ -#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ enum { INET_FLAGS_PKTINFO = 0, @@ -268,6 +267,16 @@ enum { INET_FLAGS_NODEFRAG = 17, INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, INET_FLAGS_DEFER_CONNECT = 19, + INET_FLAGS_MC6_LOOP = 20, + INET_FLAGS_RECVERR6_RFC4884 = 21, + INET_FLAGS_MC6_ALL = 22, + INET_FLAGS_AUTOFLOWLABEL_SET = 23, + INET_FLAGS_AUTOFLOWLABEL = 24, + INET_FLAGS_DONTFRAG = 25, + INET_FLAGS_RECVERR6 = 26, + INET_FLAGS_REPFLOW = 27, + INET_FLAGS_RTALERT_ISOLATE = 28, + INET_FLAGS_SNDFLOW = 29, }; /* cmsg flags for inet */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 4a8e578405cb..b14999ff55db 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -67,7 +67,8 @@ struct inet_timewait_sock { /* And these are ours. */ unsigned int tw_transparent : 1, tw_flowlabel : 20, - tw_pad : 3, /* 3 bits hole */ + tw_usec_ts : 1, + tw_pad : 2, /* 2 bits hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; diff --git a/include/net/ip.h b/include/net/ip.h index 3489a1cca5e7..1fc4c8d69e33 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -258,7 +258,7 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { - return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos); + return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos)); } /* datagram.c */ @@ -434,19 +434,22 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) static inline bool ip_sk_accept_pmtu(const struct sock *sk) { - return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE && - inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); + + return pmtudisc != IP_PMTUDISC_INTERFACE && + pmtudisc != IP_PMTUDISC_OMIT; } static inline bool ip_sk_use_pmtu(const struct sock *sk) { - return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; + return READ_ONCE(inet_sk(sk)->pmtudisc) < IP_PMTUDISC_PROBE; } static inline bool ip_sk_ignore_df(const struct sock *sk) { - return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO || - inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc); + + return pmtudisc < IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_OMIT; } static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b32539bb0fb0..28b065790261 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -53,13 +53,12 @@ struct route_info { */ static inline int rt6_srcprefs2flags(unsigned int srcprefs) { - /* No need to bitmask because srcprefs have only 3 bits. */ - return srcprefs << 3; + return (srcprefs & IPV6_PREFER_SRC_MASK) << 3; } static inline unsigned int rt6_flags2srcprefs(int flags) { - return (flags >> 3) & 7; + return (flags >> 3) & IPV6_PREFER_SRC_MASK; } static inline bool rt6_need_strict(const struct in6_addr *daddr) @@ -266,7 +265,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; - if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { + if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(dst->dev->mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu); } else { @@ -277,14 +276,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && - inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc != IPV6_PMTUDISC_INTERFACE && + pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || - inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc < IPV6_PMTUDISC_DO || + pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f0c13864180e..d4667b7797e3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -154,9 +154,10 @@ struct fib_info { int fib_nhs; bool fib_nh_is_v6; bool nh_updated; + bool pfsrc_removed; struct nexthop *nh; struct rcu_head rcu; - struct fib_nh fib_nh[]; + struct fib_nh fib_nh[] __counted_by(fib_nhs); }; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c6932d1a3fa8..78d38dd88aba 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6) } static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, - const struct ipv6_pinfo *np) + const struct sock *sk) { *ipc6 = (struct ipcm6_cookie) { .hlimit = -1, - .tclass = np->tclass, - .dontfrag = np->dontfrag, + .tclass = inet6_sk(sk)->tclass, + .dontfrag = inet6_test_bit(DONTFRAG, sk), }; } @@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); +bool ip6_autoflowlabel(struct net *net, const struct sock *sk); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { @@ -914,9 +914,9 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, int hlimit; if (ipv6_addr_is_multicast(&fl6->daddr)) - hlimit = np->mcast_hops; + hlimit = READ_ONCE(np->mcast_hops); else - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); return hlimit; @@ -1133,12 +1133,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); -struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, struct socket *sock, - struct in6_addr *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); @@ -1303,15 +1297,16 @@ static inline int ip6_sock_set_v6only(struct sock *sk) static inline void ip6_sock_set_recverr(struct sock *sk) { - lock_sock(sk); - inet6_sk(sk)->recverr = true; - release_sock(sk); + inet6_set_bit(RECVERR6, sk); } -static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) +#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \ + IPV6_PREFER_SRC_COA) + +static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) { + unsigned int prefmask = ~IPV6_PREFER_SRC_MASK; unsigned int pref = 0; - unsigned int prefmask = ~0; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC | @@ -1361,20 +1356,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) return -EINVAL; } - inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref; + WRITE_ONCE(inet6_sk(sk)->srcprefs, + (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref); return 0; } -static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) -{ - int ret; - - lock_sock(sk); - ret = __ip6_sock_set_addr_preferences(sk, val); - release_sock(sk); - return ret; -} - static inline void ip6_sock_set_recvpktinfo(struct sock *sk) { lock_sock(sk); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..485c39a89866 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -60,6 +60,9 @@ struct ipv6_stub { #if IS_ENABLED(CONFIG_XFRM) void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); + struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk, + struct list_head *head, + struct sk_buff *skb); int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); #endif @@ -85,6 +88,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7c707358d15c..580781ff9dcf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -79,7 +79,7 @@ * helpers for sanity checking. Drivers must ensure all work added onto the * mac80211 workqueue should be cancelled on the driver stop() callback. * - * mac80211 will flushed the workqueue upon interface removal and during + * mac80211 will flush the workqueue upon interface removal and during * suspend. * * All work performed on the mac80211 workqueue must not acquire the RTNL lock. @@ -138,7 +138,7 @@ * field to the frame RX timestamp and report the ack TX timestamp in the * ieee80211_rx_status struct. * - * Similarly, To report hardware timestamps for Timing Measurement or Fine + * Similarly, to report hardware timestamps for Timing Measurement or Fine * Timing Measurement frame TX, the driver should set the SKB's hwtstamp field * to the frame TX timestamp and report the ack RX timestamp in the * ieee80211_tx_status struct. @@ -341,6 +341,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response * status changed. * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. + * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -376,6 +377,7 @@ enum ieee80211_bss_change { BSS_CHANGED_FILS_DISCOVERY = 1<<30, BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), + BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -643,9 +645,7 @@ struct ieee80211_fils_discovery { * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS - * @csa_active: marks whether a channel switch is going on. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. + * @csa_active: marks whether a channel switch is going on. * @csa_punct_bitmap: new puncturing bitmap for channel switch * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL @@ -653,9 +653,7 @@ struct ieee80211_fils_discovery { * path needing to access it; even though the netdev carrier will always * be off when it is %NULL there can still be races and packets could be * processed after it switches back to %NULL. - * @color_change_active: marks whether a color change is ongoing. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. + * @color_change_active: marks whether a color change is ongoing. * @color_change_color: the bss color that will be used after the change. * @ht_ldpc: in AP mode, indicates interface has HT LDPC capability. * @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability. @@ -1082,6 +1080,11 @@ struct ieee80211_tx_rate { #define IEEE80211_MAX_TX_RETRY 31 +static inline bool ieee80211_rate_valid(struct ieee80211_tx_rate *rate) +{ + return rate->idx >= 0 && rate->count > 0; +} + static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate, u8 mcs, u8 nss) { @@ -1115,7 +1118,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) * not valid if the interface is an MLD since we won't know which * link the frame will be transmitted on * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC - * @ack_frame_id: internal frame ID for TX status, used internally + * @status_data: internal data for TX status handling, assigned privately, + * see also &enum ieee80211_status_data for the internal documentation + * @status_data_idr: indicates status data is IDR allocated ID for ack frame * @tx_time_est: TX time estimate in units of 4us, used internally * @control: union part for control data * @control.rates: TX rates array to try @@ -1155,10 +1160,11 @@ struct ieee80211_tx_info { /* common information */ u32 flags; u32 band:3, - ack_frame_id:13, + status_data_idr:1, + status_data:13, hw_queue:4, tx_time_est:10; - /* 2 free bits */ + /* 1 free bit */ union { struct { @@ -1172,7 +1178,11 @@ struct ieee80211_tx_info { u8 use_cts_prot:1; u8 short_preamble:1; u8 skip_table:1; - /* 2 bytes free */ + + /* for injection only (bitmap) */ + u8 antennas:2; + + /* 14 bits free */ }; /* only needed before rate control */ unsigned long jiffies; @@ -1757,15 +1767,15 @@ struct ieee80211_channel_switch { * @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes * and send P2P_PS notification to the driver if NOA changed, even * this is not pure P2P vif. - * @IEEE80211_VIF_DISABLE_SMPS_OVERRIDE: disable user configuration of - * SMPS mode via debugfs. + * @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is + * enabled for the interface. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1), IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), - IEEE80211_VIF_DISABLE_SMPS_OVERRIDE = BIT(4), + IEEE80211_VIF_EML_ACTIVE = BIT(4), }; @@ -1938,7 +1948,7 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif) for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ if ((!(vif)->active_links || \ (vif)->active_links & BIT(link_id)) && \ - (link = rcu_dereference((vif)->link_conf[link_id]))) + (link = link_conf_dereference_check(vif, link_id))) static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) { @@ -1971,22 +1981,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); */ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif); -/** - * lockdep_vif_mutex_held - for lockdep checks on link poiners - * @vif: the interface to check - */ -static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif) +static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif) { - return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx); + return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->wiphy->mtx); } #define link_conf_dereference_protected(vif, link_id) \ rcu_dereference_protected((vif)->link_conf[link_id], \ - lockdep_vif_mutex_held(vif)) + lockdep_vif_wiphy_mutex_held(vif)) #define link_conf_dereference_check(vif, link_id) \ rcu_dereference_check((vif)->link_conf[link_id], \ - lockdep_vif_mutex_held(vif)) + lockdep_vif_wiphy_mutex_held(vif)) /** * enum ieee80211_key_flags - key flags @@ -2393,7 +2399,7 @@ static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ if ((!(vif)->active_links || \ (vif)->active_links & BIT(link_id)) && \ - ((link_sta) = link_sta_dereference_protected(sta, link_id))) + ((link_sta) = link_sta_dereference_check(sta, link_id))) /** * enum sta_notify_cmd - sta notify command @@ -3056,7 +3062,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * The set_key() call for the %SET_KEY command should return 0 if * the key is now in use, -%EOPNOTSUPP or -%ENOSPC if it couldn't be * added; if you return 0 then hw_key_idx must be assigned to the - * hardware key index, you are free to use the full u8 range. + * hardware key index. You are free to use the full u8 range. * * Note that in the case that the @IEEE80211_HW_SW_CRYPTO_CONTROL flag is * set, mac80211 will not automatically fall back to software crypto if @@ -3066,7 +3072,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * When the cmd is %DISABLE_KEY then it must succeed. * * Note that it is permissible to not decrypt a frame even if a key - * for it has been uploaded to hardware, the stack will not make any + * for it has been uploaded to hardware. The stack will not make any * decision based on whether a key has been uploaded or not but rather * based on the receive flags. * @@ -3081,7 +3087,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * The update_tkip_key() call updates the driver with the new phase 1 key. * This happens every time the iv16 wraps around (every 65536 packets). The * set_key() call will happen only once for each key (unless the AP did - * rekeying), it will not include a valid phase 1 key. The valid phase 1 key is + * rekeying); it will not include a valid phase 1 key. The valid phase 1 key is * provided by update_tkip_key only. The trigger that makes mac80211 call this * handler is software decryption with wrap around of iv16. * @@ -3108,7 +3114,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * * mac80211 has support for various powersave implementations. * - * First, it can support hardware that handles all powersaving by itself, + * First, it can support hardware that handles all powersaving by itself; * such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS hardware * flag. In that case, it will be told about the desired powersave mode * with the %IEEE80211_CONF_PS flag depending on the association status. @@ -3133,12 +3139,12 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still * required to pass up beacons. The hardware is still required to handle * waking up for multicast traffic; if it cannot the driver must handle that - * as best as it can, mac80211 is too slow to do that. + * as best as it can; mac80211 is too slow to do that. * * Dynamic powersave is an extension to normal powersave in which the * hardware stays awake for a user-specified period of time after sending a * frame so that reply frames need not be buffered and therefore delayed to - * the next wakeup. It's compromise of getting good enough latency when + * the next wakeup. It's a compromise of getting good enough latency when * there's data traffic and still saving significantly power in idle * periods. * @@ -3203,7 +3209,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * Note that change, for the sake of simplification, also includes information * elements appearing or disappearing from the beacon. * - * Some hardware supports an "ignore list" instead, just make sure nothing + * Some hardware supports an "ignore list" instead. Just make sure nothing * that was requested is on the ignore list, and include commonly changing * information element IDs in the ignore list, for example 11 (BSS load) and * the various vendor-assigned IEs with unknown contents (128, 129, 133-136, @@ -3214,7 +3220,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * In addition to these capabilities, hardware should support notifying the * host of changes in the beacon RSSI. This is relevant to implement roaming * when no traffic is flowing (when traffic is flowing we see the RSSI of - * the received data packets). This can consist in notifying the host when + * the received data packets). This can consist of notifying the host when * the RSSI changes significantly or when it drops below or rises above * configurable thresholds. In the future these thresholds will also be * configured by mac80211 (which gets them from userspace) to implement @@ -3361,8 +3367,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * period starts for any reason, @release_buffered_frames is called * with the number of frames to be released and which TIDs they are * to come from. In this case, the driver is responsible for setting - * the EOSP (for uAPSD) and MORE_DATA bits in the released frames, - * to help the @more_data parameter is passed to tell the driver if + * the EOSP (for uAPSD) and MORE_DATA bits in the released frames. + * To help the @more_data parameter is passed to tell the driver if * there is more data on other TIDs -- the TIDs to release frames * from are ignored since mac80211 doesn't know how many frames the * buffers for those TIDs contain. @@ -3411,7 +3417,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * Additionally, the driver has to then use these HW queue IDs for the queue * management functions (ieee80211_stop_queue() et al.) * - * The driver is free to set up the queue mappings as needed, multiple virtual + * The driver is free to set up the queue mappings as needed; multiple virtual * interfaces may map to the same hardware queues if needed. The setup has to * happen during add_interface or change_interface callbacks. For example, a * driver supporting station+station and station+AP modes might decide to have @@ -3635,11 +3641,14 @@ enum ieee80211_reconfig_type { * @success: whether the frame exchange was successful, only * used with the mgd_complete_tx() method, and then only * valid for auth and (re)assoc. + * @link_id: the link id on which the frame will be TX'ed. + * Only used with the mgd_prepare_tx() method. */ struct ieee80211_prep_tx_info { u16 duration; u16 subtype; u8 success:1; + int link_id; }; /** @@ -3863,6 +3872,10 @@ struct ieee80211_prep_tx_info { * the station. See @sta_pre_rcu_remove if needed. * This callback can sleep. * + * @vif_add_debugfs: Drivers can use this callback to add a debugfs vif + * directory with its files. This callback should be within a + * CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep. + * * @link_add_debugfs: Drivers can use this callback to add debugfs files * when a link is added to a mac80211 vif. This callback should be within * a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep. @@ -4067,11 +4080,15 @@ struct ieee80211_prep_tx_info { * This callback must be atomic. * * @get_et_sset_count: Ethtool API to get string-set count. + * Note that the wiphy mutex is not held for this callback since it's + * expected to return a static value. * * @get_et_stats: Ethtool API to get a set of u64 stats. * * @get_et_strings: Ethtool API to get a set of strings to describe stats * and perhaps other supported types of ethtool data-sets. + * Note that the wiphy mutex is not held for this callback since it's + * expected to return a static value. * * @mgd_prepare_tx: Prepare for transmitting a management frame for association * before associated. In multi-channel scenarios, a virtual interface is @@ -4358,6 +4375,8 @@ struct ieee80211_ops { int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); #ifdef CONFIG_MAC80211_DEBUGFS + void (*vif_add_debugfs)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); void (*link_add_debugfs)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, @@ -4506,7 +4525,8 @@ struct ieee80211_ops { struct ieee80211_prep_tx_info *info); void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); int (*add_chanctx)(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx); @@ -4544,7 +4564,8 @@ struct ieee80211_ops { struct ieee80211_channel_switch *ch_switch); int (*post_channel_switch)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); void (*abort_channel_switch)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw, @@ -4890,7 +4911,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw); * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * This function must be called with BHs disabled and RCU read lock * @@ -4915,7 +4936,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta, * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * This function must be called with BHs disabled. * @@ -4940,7 +4961,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *sta, * for a single hardware must be synchronized against each other. Calls to * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be * mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * In process context use instead ieee80211_rx_ni(). * @@ -4960,7 +4981,7 @@ static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) * * Calls to this function, ieee80211_rx() or ieee80211_rx_ni() may not * be mixed for a single hardware.Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call @@ -4975,7 +4996,7 @@ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb); * * Calls to this function, ieee80211_rx() and ieee80211_rx_irqsafe() may * not be mixed for a single hardware. Must not run concurrently with - * ieee80211_tx_status() or ieee80211_tx_status_ni(). + * ieee80211_tx_status_skb() or ieee80211_tx_status_ni(). * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call @@ -5151,7 +5172,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, struct ieee80211_tx_info *info); /** - * ieee80211_tx_status - transmit status callback + * ieee80211_tx_status_skb - transmit status callback * * Call this function for all transmitted frames after they have been * transmitted. It is permissible to not call this function for @@ -5166,13 +5187,13 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw, * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call */ -void ieee80211_tx_status(struct ieee80211_hw *hw, - struct sk_buff *skb); +void ieee80211_tx_status_skb(struct ieee80211_hw *hw, + struct sk_buff *skb); /** * ieee80211_tx_status_ext - extended transmit status callback * - * This function can be used as a replacement for ieee80211_tx_status + * This function can be used as a replacement for ieee80211_tx_status_skb() * in drivers that may want to provide extra information that does not * fit into &struct ieee80211_tx_info. * @@ -5189,7 +5210,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, /** * ieee80211_tx_status_noskb - transmit status callback without skb * - * This function can be used as a replacement for ieee80211_tx_status + * This function can be used as a replacement for ieee80211_tx_status_skb() * in drivers that cannot reliably map tx status information back to * specific skbs. * @@ -5217,9 +5238,9 @@ static inline void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, /** * ieee80211_tx_status_ni - transmit status callback (in process context) * - * Like ieee80211_tx_status() but can be called in process context. + * Like ieee80211_tx_status_skb() but can be called in process context. * - * Calls to this function, ieee80211_tx_status() and + * Calls to this function, ieee80211_tx_status_skb() and * ieee80211_tx_status_irqsafe() may not be mixed * for a single hardware. * @@ -5230,17 +5251,17 @@ static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw, struct sk_buff *skb) { local_bh_disable(); - ieee80211_tx_status(hw, skb); + ieee80211_tx_status_skb(hw, skb); local_bh_enable(); } /** * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback * - * Like ieee80211_tx_status() but can be called in IRQ context + * Like ieee80211_tx_status_skb() but can be called in IRQ context * (internally defers to a tasklet.) * - * Calls to this function, ieee80211_tx_status() and + * Calls to this function, ieee80211_tx_status_skb() and * ieee80211_tx_status_ni() may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by @@ -6542,11 +6563,14 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); * ieee80211_chswitch_done - Complete channel switch process * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @success: make the channel switch successful or not + * @link_id: the link_id on which the switch was done. Ignored if success is + * false. * * Complete the channel switch post-process: set the new operational channel * and wake up the suspended queues. */ -void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, + unsigned int link_id); /** * ieee80211_channel_switch_disconnect - disconnect due to channel switch error @@ -7242,7 +7266,7 @@ ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq, * * This function is used to check whether given txq is allowed to transmit by * the airtime scheduler, and can be used by drivers to access the airtime - * fairness accounting without going using the scheduling order enfored by + * fairness accounting without using the scheduling order enforced by * next_txq(). * * Returns %true if the airtime scheduler thinks the TXQ should be allowed to diff --git a/include/net/macsec.h b/include/net/macsec.h index 75a6f4863c83..ebf9bc54036a 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -258,6 +258,7 @@ struct macsec_context { struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct { + bool update_pn; unsigned char assoc_num; u8 key[MACSEC_MAX_KEY_LEN]; union { diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h index 3d3b5c881bc1..158b125692c2 100644 --- a/include/net/mana/hw_channel.h +++ b/include/net/mana/hw_channel.h @@ -121,7 +121,7 @@ struct hwc_dma_buf { u32 gpa_mkey; u32 num_reqs; - struct hwc_work_request reqs[]; + struct hwc_work_request reqs[] __counted_by(num_reqs); }; typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id, diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9f70b4332238..6e3e9c1363db 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -103,9 +103,10 @@ struct mana_txq { /* skb data and frags dma mappings */ struct mana_skb_head { - dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + /* GSO pkts may have 2 SGEs for the linear part*/ + dma_addr_t dma_handle[MAX_SKB_FRAGS + 2]; - u32 size[MAX_SKB_FRAGS + 1]; + u32 size[MAX_SKB_FRAGS + 2]; }; #define MANA_HEADROOM sizeof(struct mana_skb_head) @@ -338,7 +339,7 @@ struct mana_rxq { /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. */ - struct mana_recv_buf_oob rx_oobs[]; + struct mana_recv_buf_oob rx_oobs[] __counted_by(num_rx_buf); }; struct mana_tx_qp { diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6da68886fabb..07022bb0d44d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -539,7 +539,7 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - return n->output(n, skb); + return READ_ONCE(n->output)(n, skb); } static inline struct neighbour * diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index eb6cd43b1746..13b3a4e29fdb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -368,21 +368,30 @@ static inline void put_net_track(struct net *net, netns_tracker *tracker) typedef struct { #ifdef CONFIG_NET_NS - struct net *net; + struct net __rcu *net; #endif } possible_net_t; static inline void write_pnet(possible_net_t *pnet, struct net *net) { #ifdef CONFIG_NET_NS - pnet->net = net; + rcu_assign_pointer(pnet->net, net); #endif } static inline struct net *read_pnet(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS - return pnet->net; + return rcu_dereference_protected(pnet->net, true); +#else + return &init_net; +#endif +} + +static inline struct net *read_pnet_rcu(possible_net_t *pnet) +{ +#ifdef CONFIG_NET_NS + return rcu_dereference(pnet->net); #else return &init_net; #endif diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4085765c3370..cba3ccf03fcc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -160,10 +160,6 @@ static inline struct net *nf_ct_net(const struct nf_conn *ct) return read_pnet(&ct->ct_net); } -/* Alter reply tuple (maybe alter helper). */ -void nf_conntrack_alter_reply(struct nf_conn *ct, - const struct nf_conntrack_tuple *newreply); - /* Is this tuple taken? (ignoring any belonging to the given conntrack). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, @@ -284,6 +280,16 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +static inline void nf_conntrack_alter_reply(struct nf_conn *ct, + const struct nf_conntrack_tuple *newreply) +{ + /* Must be unconfirmed, so not in hash table yet */ + if (WARN_ON(nf_ct_is_confirmed(ct))) + return; + + ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; +} + #define nfct_time_stamp ((u32)(jiffies)) /* jiffies until ct expires, 0 if already expired */ diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h index 078d3c52c03f..e5f2f0b73a9a 100644 --- a/include/net/netfilter/nf_conntrack_act_ct.h +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -20,7 +20,22 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf #endif } -static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +static inline struct +nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { #if IS_ENABLED(CONFIG_NET_ACT_CT) struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); @@ -29,22 +44,11 @@ static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn * return act_ct; act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); return act_ct; #else return NULL; #endif } -static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ -#if IS_ENABLED(CONFIG_NET_ACT_CT) - struct nf_conn_act_ct_ext *act_ct_ext; - - act_ct_ext = nf_conn_act_ct_ext_find(ct); - if (dev_net(skb->dev) == &init_net && act_ct_ext) - act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; -#endif -} - #endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index fcb19a4e8f2b..6903f72bcc15 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -39,7 +39,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) #ifdef CONFIG_NF_CONNTRACK_LABELS struct net *net = nf_ct_net(ct); - if (net->ct.labels_used == 0) + if (atomic_read(&net->ct.labels_used) == 0) return NULL; return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d466e1a3b0b1..fe1507c1db82 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -53,6 +53,7 @@ struct nf_flowtable_type { struct list_head list; int family; int (*init)(struct nf_flowtable *ft); + bool (*gc)(const struct flow_offload *flow); int (*setup)(struct nf_flowtable *ft, struct net_device *dev, enum flow_block_command cmd); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dd40c75011d2..3bbd13ab1ecf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -274,6 +274,9 @@ struct nft_userdata { unsigned char data[]; }; +/* placeholder structure for opaque set element backend representation. */ +struct nft_elem_priv { }; + /** * struct nft_set_elem - generic representation of set elements * @@ -294,9 +297,14 @@ struct nft_set_elem { u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; struct nft_data val; } data; - void *priv; + struct nft_elem_priv *priv; }; +static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) +{ + return (void *)priv; +} + struct nft_set; struct nft_set_iter { u8 genmask; @@ -306,7 +314,7 @@ struct nft_set_iter { int (*fn)(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); }; /** @@ -430,7 +438,8 @@ struct nft_set_ops { const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, + struct nft_elem_priv * + (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *), const struct nft_expr *expr, @@ -442,27 +451,27 @@ struct nft_set_ops { int (*insert)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext); + struct nft_elem_priv **priv); void (*activate)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); - void * (*deactivate)(const struct net *net, + struct nft_elem_priv *elem_priv); + struct nft_elem_priv * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); - bool (*flush)(const struct net *net, + void (*flush)(const struct net *net, const struct nft_set *set, - void *priv); + struct nft_elem_priv *priv); void (*remove)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); void (*walk)(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter); - void * (*get)(const struct net *net, + struct nft_elem_priv * (*get)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); - void (*commit)(const struct nft_set *set); + void (*commit)(struct nft_set *set); void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); @@ -796,9 +805,9 @@ static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, - void *elem) + const struct nft_elem_priv *elem_priv) { - return elem + set->ops->elemsize; + return (void *)elem_priv + set->ops->elemsize; } static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) @@ -810,16 +819,19 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr); -void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *key_end, const u32 *data, - u64 timeout, u64 expiration, gfp_t gfp); +struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, + const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp); int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); -void nft_set_elem_destroy(const struct nft_set *set, void *elem, +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, bool destroy_expr); void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem); + const struct nft_set *set, + const struct nft_elem_priv *elem_priv); struct nft_expr_ops; /** @@ -1061,7 +1073,7 @@ struct nft_chain { int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); @@ -1198,10 +1210,13 @@ static inline void nft_use_inc_restore(u32 *use) * @hgenerator: handle generator state * @handle: table handle * @use: number of chain references to this table + * @family:address family * @flags: table flag (see enum nft_table_flags) * @genmask: generation mask - * @afinfo: address family info + * @nlpid: netlink port ID * @name: name of the table + * @udlen: length of the user data + * @udata: user data * @validate_state: internal, set when transaction adds jumps */ struct nft_table { @@ -1635,14 +1650,14 @@ struct nft_trans_table { struct nft_trans_elem { struct nft_set *set; - struct nft_set_elem elem; + struct nft_elem_priv *elem_priv; bool bound; }; #define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem(trans) \ - (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_priv(trans) \ + (((struct nft_trans_elem *)trans->data)->elem_priv) #define nft_trans_elem_set_bound(trans) \ (((struct nft_trans_elem *)trans->data)->bound) @@ -1682,8 +1697,8 @@ struct nft_trans_gc { struct net *net; struct nft_set *set; u32 seq; - u8 count; - void *priv[NFT_TRANS_GC_BATCHCOUNT]; + u16 count; + struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT]; struct rcu_head rcu; }; @@ -1700,12 +1715,13 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); -struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq); +struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc); void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); diff --git a/include/net/netkit.h b/include/net/netkit.h new file mode 100644 index 000000000000..0ba2e6b847ca --- /dev/null +++ b/include/net/netkit.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Isovalent */ +#ifndef __NET_NETKIT_H +#define __NET_NETKIT_H + +#include <linux/bpf.h> + +#ifdef CONFIG_NETKIT +int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); +int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +#else +static inline int netkit_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int netkit_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int netkit_prog_detach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int netkit_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} +#endif /* CONFIG_NETKIT */ +#endif /* __NET_NETKIT_H */ diff --git a/include/net/netlink.h b/include/net/netlink.h index 8a7cd1170e1f..83bdf787aeee 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -128,6 +128,8 @@ * nla_len(nla) length of attribute payload * * Attribute Payload Access for Basic Types: + * nla_get_uint(nla) get payload for a uint attribute + * nla_get_sint(nla) get payload for a sint attribute * nla_get_u8(nla) get payload for a u8 attribute * nla_get_u16(nla) get payload for a u16 attribute * nla_get_u32(nla) get payload for a u32 attribute @@ -183,6 +185,8 @@ enum { NLA_REJECT, NLA_BE16, NLA_BE32, + NLA_SINT, + NLA_UINT, __NLA_TYPE_MAX, }; @@ -229,6 +233,7 @@ enum nla_policy_validation { * nested header (or empty); len field is used if * nested_policy is also used, for the max attr * number in the nested policy. + * NLA_SINT, NLA_UINT, * NLA_U8, NLA_U16, * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, @@ -260,12 +265,14 @@ enum nla_policy_validation { * while an array has the nested attributes at another * level down and the attribute types directly in the * nesting don't matter. + * NLA_UINT, * NLA_U8, * NLA_U16, * NLA_U32, * NLA_U64, * NLA_BE16, * NLA_BE32, + * NLA_SINT, * NLA_S8, * NLA_S16, * NLA_S32, @@ -280,6 +287,7 @@ enum nla_policy_validation { * or NLA_POLICY_FULL_RANGE_SIGNED() macros instead. * Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and * NLA_POLICY_RANGE() macros. + * NLA_UINT, * NLA_U8, * NLA_U16, * NLA_U32, @@ -288,6 +296,7 @@ enum nla_policy_validation { * to a struct netlink_range_validation that indicates * the min/max values. * Use NLA_POLICY_FULL_RANGE(). + * NLA_SINT, * NLA_S8, * NLA_S16, * NLA_S32, @@ -351,8 +360,8 @@ struct nla_policy { const u32 mask; const char *reject_message; const struct nla_policy *nested_policy; - struct netlink_range_validation *range; - struct netlink_range_validation_signed *range_signed; + const struct netlink_range_validation *range; + const struct netlink_range_validation_signed *range_signed; struct { s16 min, max; }; @@ -377,9 +386,11 @@ struct nla_policy { #define __NLA_IS_UINT_TYPE(tp) \ (tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || \ - tp == NLA_U64 || tp == NLA_BE16 || tp == NLA_BE32) + tp == NLA_U64 || tp == NLA_UINT || \ + tp == NLA_BE16 || tp == NLA_BE32) #define __NLA_IS_SINT_TYPE(tp) \ - (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) + (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64 || \ + tp == NLA_SINT) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_UINT_TYPE(tp) \ @@ -1358,6 +1369,22 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) } /** + * nla_put_uint - Add a variable-size unsigned int to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_uint(struct sk_buff *skb, int attrtype, u64 value) +{ + u64 tmp64 = value; + u32 tmp32 = value; + + if (tmp64 == tmp32) + return nla_put_u32(skb, attrtype, tmp32); + return nla_put(skb, attrtype, sizeof(u64), &tmp64); +} + +/** * nla_put_be32 - Add a __be32 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type @@ -1512,6 +1539,22 @@ static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, } /** + * nla_put_sint - Add a variable-size signed int to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @value: numeric value + */ +static inline int nla_put_sint(struct sk_buff *skb, int attrtype, s64 value) +{ + s64 tmp64 = value; + s32 tmp32 = value; + + if (tmp64 == tmp32) + return nla_put_s32(skb, attrtype, tmp32); + return nla_put(skb, attrtype, sizeof(s64), &tmp64); +} + +/** * nla_put_string - Add a string netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type @@ -1668,6 +1711,17 @@ static inline u64 nla_get_u64(const struct nlattr *nla) } /** + * nla_get_uint - return payload of uint attribute + * @nla: uint netlink attribute + */ +static inline u64 nla_get_uint(const struct nlattr *nla) +{ + if (nla_len(nla) == sizeof(u32)) + return nla_get_u32(nla); + return nla_get_u64(nla); +} + +/** * nla_get_be64 - return payload of __be64 attribute * @nla: __be64 netlink attribute */ @@ -1730,6 +1784,17 @@ static inline s64 nla_get_s64(const struct nlattr *nla) } /** + * nla_get_sint - return payload of uint attribute + * @nla: uint netlink attribute + */ +static inline s64 nla_get_sint(const struct nlattr *nla) +{ + if (nla_len(nla) == sizeof(s32)) + return nla_get_s32(nla); + return nla_get_s64(nla); +} + +/** * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 1f463b3957c7..bae914815aa3 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -107,7 +107,7 @@ struct netns_ct { struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) - unsigned int labels_used; + atomic_t labels_used; #endif }; #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7a41c4791536..73f43f699199 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,9 @@ struct netns_ipv4 { u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; + u8 sysctl_tcp_backlog_ack_defer; + u8 sysctl_tcp_pingpong_thresh; + int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bd7c3be4af5d..423b52eca908 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -50,6 +50,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; + unsigned int idx_generator; struct hlist_head policy_inexact[XFRM_POLICY_MAX]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 2b12725de9c0..d92046a4a078 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -92,7 +92,7 @@ struct nh_res_table { u32 unbalanced_timer; u16 num_nh_buckets; - struct nh_res_bucket nh_buckets[]; + struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); }; struct nh_grp_entry { @@ -126,7 +126,7 @@ struct nh_group { bool has_v4; struct nh_res_table __rcu *res_table; - struct nh_grp_entry nh_entries[]; + struct nh_grp_entry nh_entries[] __counted_by(num_nh); }; struct nexthop { @@ -187,7 +187,7 @@ struct nh_notifier_grp_entry_info { struct nh_notifier_grp_info { u16 num_nh; bool is_fdb; - struct nh_notifier_grp_entry_info nh_entries[]; + struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); }; struct nh_notifier_res_bucket_info { @@ -200,7 +200,7 @@ struct nh_notifier_res_bucket_info { struct nh_notifier_res_table_info { u16 num_nh_buckets; - struct nh_notifier_single_info nhs[]; + struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; struct nh_notifier_info { diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 94231533a369..4ebd544ae977 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -8,23 +8,46 @@ /** * DOC: page_pool allocator * - * The page_pool allocator is optimized for the XDP mode that - * uses one frame per-page, but it can fallback on the - * regular page allocator APIs. - * - * Basic use involves replacing alloc_pages() calls with the - * page_pool_alloc_pages() call. Drivers should use - * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). - * - * API keeps track of in-flight pages, in order to let API user know - * when it is safe to free a page_pool object. Thus, API users - * must call page_pool_put_page() to free the page, or attach - * the page to a page_pool-aware objects like skbs marked with + * The page_pool allocator is optimized for recycling page or page fragment used + * by skb packet and xdp frame. + * + * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(), + * which allocate memory with or without page splitting depending on the + * requested memory size. + * + * If the driver knows that it always requires full pages or its allocations are + * always smaller than half a page, it can use one of the more specific API + * calls: + * + * 1. page_pool_alloc_pages(): allocate memory without page splitting when + * driver knows that the memory it need is always bigger than half of the page + * allocated from page pool. There is no cache line dirtying for 'struct page' + * when a page is recycled back to the page pool. + * + * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver + * knows that the memory it need is always smaller than or equal to half of the + * page allocated from page pool. Page splitting enables memory saving and thus + * avoids TLB/cache miss for data access, but there also is some cost to + * implement page splitting, mainly some cache line dirtying/bouncing for + * 'struct page' and atomic operation for page->pp_frag_count. + * + * The API keeps track of in-flight pages, in order to let API users know when + * it is safe to free a page_pool object, the API users must call + * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or + * attach the page_pool object to a page_pool-aware object like skbs marked with * skb_mark_for_recycle(). * - * API user must call page_pool_put_page() once on a page, as it - * will either recycle the page, or in case of refcnt > 1, it will - * release the DMA mapping and in-flight state accounting. + * page_pool_put_page() may be called multi times on the same page if a page is + * split into multi fragments. For the last fragment, it will either recycle the + * page, or in case of page->_refcount > 1, it will release the DMA mapping and + * in-flight state accounting. + * + * dma_sync_single_range_for_device() is only called for the last fragment when + * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the + * last freed fragment to do the sync_for_device operation for all fragments in + * the same page when a page is split, the API user must setup pool->p.max_len + * and pool->p.offset correctly and ensure that page_pool_put_page() is called + * with dma_sync_size being -1 for fragment API. */ #ifndef _NET_PAGE_POOL_HELPERS_H #define _NET_PAGE_POOL_HELPERS_H @@ -73,6 +96,17 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } +/** + * page_pool_dev_alloc_frag() - allocate a page fragment. + * @pool: pool from which to allocate + * @offset: offset to the allocated page + * @size: requested size + * + * Get a page fragment from the page allocator or page_pool caches. + * + * Return: + * Return allocated page fragment, otherwise return NULL. + */ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size) @@ -82,6 +116,91 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, return page_pool_alloc_frag(pool, offset, size, gfp); } +static inline struct page *page_pool_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) +{ + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page; + + if ((*size << 1) > max_size) { + *size = max_size; + *offset = 0; + return page_pool_alloc_pages(pool, gfp); + } + + page = page_pool_alloc_frag(pool, offset, *size, gfp); + if (unlikely(!page)) + return NULL; + + /* There is very likely not enough space for another fragment, so append + * the remaining size to the current fragment to avoid truesize + * underestimate problem. + */ + if (pool->frag_offset + *size > max_size) { + *size = max_size - *offset; + pool->frag_offset = max_size; + } + + return page; +} + +/** + * page_pool_dev_alloc() - allocate a page or a page fragment. + * @pool: pool from which to allocate + * @offset: offset to the allocated page + * @size: in as the requested size, out as the allocated size + * + * Get a page or a page fragment from the page allocator or page_pool caches + * depending on the requested size in order to allocate memory with least memory + * utilization and performance penalty. + * + * Return: + * Return allocated page or page fragment, otherwise return NULL. + */ +static inline struct page *page_pool_dev_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc(pool, offset, size, gfp); +} + +static inline void *page_pool_alloc_va(struct page_pool *pool, + unsigned int *size, gfp_t gfp) +{ + unsigned int offset; + struct page *page; + + /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */ + page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM); + if (unlikely(!page)) + return NULL; + + return page_address(page) + offset; +} + +/** + * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its + * va. + * @pool: pool from which to allocate + * @size: in as the requested size, out as the allocated size + * + * This is just a thin wrapper around the page_pool_alloc() API, and + * it returns va of the allocated page or page fragment. + * + * Return: + * Return the va for the allocated page or page fragment, otherwise return NULL. + */ +static inline void *page_pool_dev_alloc_va(struct page_pool *pool, + unsigned int *size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_va(pool, size, gfp); +} + /** * page_pool_get_dma_dir() - Retrieve the stored DMA direction. * @pool: pool from which page was allocated @@ -115,28 +234,49 @@ static inline long page_pool_defrag_page(struct page *page, long nr) long ret; /* If nr == pp_frag_count then we have cleared all remaining - * references to the page. No need to actually overwrite it, instead - * we can leave this to be overwritten by the calling function. + * references to the page: + * 1. 'n == 1': no need to actually overwrite it. + * 2. 'n != 1': overwrite it with one, which is the rare case + * for pp_frag_count draining. * - * The main advantage to doing this is that an atomic_read is - * generally a much cheaper operation than an atomic update, - * especially when dealing with a page that may be partitioned - * into only 2 or 3 pieces. + * The main advantage to doing this is that not only we avoid a atomic + * update, as an atomic_read is generally a much cheaper operation than + * an atomic update, especially when dealing with a page that may be + * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count + * handling by ensuring all pages have partitioned into only 1 piece + * initially, and only overwrite it when the page is partitioned into + * more than one piece. */ - if (atomic_long_read(&page->pp_frag_count) == nr) + if (atomic_long_read(&page->pp_frag_count) == nr) { + /* As we have ensured nr is always one for constant case using + * the BUILD_BUG_ON(), only need to handle the non-constant case + * here for pp_frag_count draining, which is a rare case. + */ + BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); + if (!__builtin_constant_p(nr)) + atomic_long_set(&page->pp_frag_count, 1); + return 0; + } ret = atomic_long_sub_return(nr, &page->pp_frag_count); WARN_ON(ret < 0); + + /* We are the last user here too, reset pp_frag_count back to 1 to + * ensure all pages have been partitioned into 1 piece initially, + * this should be the rare case when the last two fragment users call + * page_pool_defrag_page() currently. + */ + if (unlikely(!ret)) + atomic_long_set(&page->pp_frag_count, 1); + return ret; } -static inline bool page_pool_is_last_frag(struct page_pool *pool, - struct page *page) +static inline bool page_pool_is_last_frag(struct page *page) { - /* If fragments aren't enabled or count is 0 we were the last user */ - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || - (page_pool_defrag_page(page, 1) == 0); + /* If page_pool_defrag_page() returns 0, we were the last user */ + return page_pool_defrag_page(page, 1) == 0; } /** @@ -161,7 +301,7 @@ static inline void page_pool_put_page(struct page_pool *pool, * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_frag(pool, page)) + if (!page_pool_is_last_frag(page)) return; page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); @@ -197,10 +337,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ +#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) /** + * page_pool_free_va() - free a va into the page_pool + * @pool: pool from which va was allocated + * @va: va to be freed + * @allow_direct: freed by the consumer, allow lockless caching + * + * Free a va allocated from page_pool_allo_va(). + */ +static inline void page_pool_free_va(struct page_pool *pool, void *va, + bool allow_direct) +{ + page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct); +} + +/** * page_pool_get_dma_addr() - Retrieve the stored DMA address. * @page: page allocated from a page pool * @@ -211,17 +365,25 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { dma_addr_t ret = page->dma_addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; return ret; } -static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { + page->dma_addr = addr >> PAGE_SHIFT; + + /* We assume page alignment to shave off bottom bits, + * if this "compression" doesn't work we need to drop. + */ + return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; + } + page->dma_addr = addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - page->dma_addr_upper = upper_32_bits(addr); + return false; } static inline bool page_pool_put(struct page_pool *pool) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 887e7946a597..6fc5134095ed 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -17,10 +17,8 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ - PP_FLAG_DMA_SYNC_DEV |\ - PP_FLAG_PAGE_FRAG) + PP_FLAG_DMA_SYNC_DEV) /* * Fast allocation side cache array/stack @@ -45,7 +43,7 @@ struct pp_alloc_cache { /** * struct page_pool_params - page pool parameters - * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG + * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV * @order: 2^order pages on allocation * @pool_size: size of the ptr_ring * @nid: NUMA node id to allocate from pages from diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f308e8268651..a76c9171db0e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl) return xchg(clp, cl); } +static inline void tcf_set_drop_reason(struct tcf_result *res, + enum skb_drop_reason reason) +{ + res->drop_reason = reason; +} + static inline void __tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 15960564e0c3..9fa1d0794dfa 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -20,10 +20,10 @@ struct qdisc_walker { int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); }; -static inline void *qdisc_priv(struct Qdisc *q) -{ - return &q->privdata; -} +#define qdisc_priv(q) \ + _Generic(q, \ + const struct Qdisc * : (const void *)&q->privdata, \ + struct Qdisc * : (void *)&q->privdata) static inline struct Qdisc *qdisc_from_priv(void *priv) { diff --git a/include/net/regulatory.h b/include/net/regulatory.h index b2cb4a9eb04d..ebf9e028d1ef 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -213,6 +213,7 @@ struct ieee80211_reg_rule { u32 flags; u32 dfs_cac_ms; bool has_wmm; + s8 psd; }; struct ieee80211_regdomain { diff --git a/include/net/route.h b/include/net/route.h index 51a45b1887b5..980ab474eabd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -37,7 +37,7 @@ #define RTO_ONLINK 0x01 -#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) +#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) static inline __u8 ip_sock_rt_scope(const struct sock *sk) @@ -50,7 +50,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk) static inline __u8 ip_sock_rt_tos(const struct sock *sk) { - return RT_TOS(inet_sk(sk)->tos); + return RT_TOS(READ_ONCE(inet_sk(sk)->tos)); } struct ip_tunnel_info; @@ -136,12 +136,6 @@ static inline struct rtable *__ip_route_output_key(struct net *net, struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); -struct rtable *ip_route_output_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); - struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f232512505f8..dcb9160e6467 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -324,7 +324,6 @@ struct Qdisc_ops { struct module *owner; }; - struct tcf_result { union { struct { @@ -332,8 +331,8 @@ struct tcf_result { u32 classid; }; const struct tcf_proto *goto_tp; - }; + enum skb_drop_reason drop_reason; }; struct tcf_chain; @@ -587,6 +586,7 @@ static inline void sch_tree_unlock(struct Qdisc *q) extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; +extern const u8 sch_default_prio2band[TC_PRIO_MAX + 1]; extern struct Qdisc_ops mq_qdisc_ops; extern struct Qdisc_ops noqueue_qdisc_ops; extern const struct Qdisc_ops *default_qdisc_ops; diff --git a/include/net/sock.h b/include/net/sock.h index b770261fbdaf..1d6931caf0c3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -336,7 +336,7 @@ struct sk_filter; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start - * @sk_wait_pending: number of threads blocked on this socket + * @sk_disconnects: number of disconnect operations performed on this sock * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -429,7 +429,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; - int sk_wait_pending; + int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -1189,8 +1189,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } #define sk_wait_event(__sk, __timeo, __condition, __wait) \ - ({ int __rc; \ - __sk->sk_wait_pending++; \ + ({ int __rc, __dis = __sk->sk_disconnects; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1200,8 +1199,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ - __sk->sk_wait_pending--; \ - __rc = __condition; \ + __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \ __rc; \ }) @@ -1823,12 +1821,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) static inline void sock_release_ownership(struct sock *sk) { - if (sock_owned_by_user_nocheck(sk)) { - sk->sk_lock.owned = 0; + DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk)); + sk->sk_lock.owned = 0; - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); } /* no reclassification while locks are held */ @@ -1867,11 +1864,13 @@ int sk_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int sock_setsockopt(struct socket *sock, int level, int op, sockptr_t optval, unsigned int optlen); +int do_sock_setsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, int optlen); +int do_sock_getsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, sockptr_t optlen); int sk_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); -int sock_getsockopt(struct socket *sock, int level, int op, - char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, @@ -2008,21 +2007,33 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) /* sk_tx_queue_mapping accept only upto a 16-bit value */ if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX)) return; - sk->sk_tx_queue_mapping = tx_queue; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); } #define NO_QUEUE_MAPPING USHRT_MAX static inline void sk_tx_queue_clear(struct sock *sk) { - sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING); } static inline int sk_tx_queue_get(const struct sock *sk) { - if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_tx_queue_mapping; + if (sk) { + /* Paired with WRITE_ONCE() in sk_tx_queue_clear() + * and sk_tx_queue_set(). + */ + int val = READ_ONCE(sk->sk_tx_queue_mapping); + if (val != NO_QUEUE_MAPPING) + return val; + } return -1; } @@ -2142,14 +2153,14 @@ static inline bool sk_rethink_txhash(struct sock *sk) } static inline struct dst_entry * -__sk_dst_get(struct sock *sk) +__sk_dst_get(const struct sock *sk) { return rcu_dereference_check(sk->sk_dst_cache, lockdep_sock_is_held(sk)); } static inline struct dst_entry * -sk_dst_get(struct sock *sk) +sk_dst_get(const struct sock *sk) { struct dst_entry *dst; @@ -2171,7 +2182,7 @@ static inline void __dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } } @@ -2188,7 +2199,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = rcu_dereference_protected(sk->sk_dst_cache, lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_dst_cache, dst); @@ -2201,7 +2212,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -2239,7 +2250,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) } } -bool sk_mc_loop(struct sock *sk); +bool sk_mc_loop(const struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) { diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index b24ea2d9400b..8a6dbfb23336 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -22,6 +22,7 @@ struct tcf_ct_params { struct nf_nat_range2 range; bool ipv4_range; + bool put_labels; u16 ct_action; diff --git a/include/net/tcp.h b/include/net/tcp.h index 91688d0dadcd..d2f0736b76b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -37,6 +37,7 @@ #include <net/snmp.h> #include <net/ip.h> #include <net/tcp_states.h> +#include <net/tcp_ao.h> #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> @@ -131,6 +132,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ +static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); + #if HZ >= 100 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ #define TCP_ATO_MIN ((unsigned)(HZ/25)) @@ -141,6 +144,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ + +#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */ + #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the @@ -161,7 +167,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_KEEPCNT 127 #define MAX_TCP_SYNCNT 127 -#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24) +/* Ensure that TCP PAWS checks are relaxed after ~2147 seconds + * to avoid overflows. This assumes a clock smaller than 1 Mhz. + * Default clock is 1 Khz, tcp_usec_ts uses 1 Mhz. + */ +#define TCP_PAWS_WRAP (INT_MAX / USEC_PER_SEC) + #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated * after this time. It should be equal * (or greater than) TCP_TIMEWAIT_LEN @@ -184,6 +195,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_AO 29 /* Authentication Option (RFC5925) */ #define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ @@ -348,12 +360,14 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule); -static inline void tcp_dec_quickack_mode(struct sock *sk, - const unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.quick) { + /* How many ACKs S/ACKing new data have we sent? */ + const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0; + if (pkts >= icsk->icsk_ack.quick) { icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ @@ -424,7 +438,6 @@ int tcp_mmap(struct file *file, struct socket *sock, void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); -const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* * BPF SKB-less helpers @@ -718,8 +731,10 @@ static inline void tcp_fast_path_check(struct sock *sk) tcp_fast_path_on(tp); } +u32 tcp_delack_max(const struct sock *sk); + /* Compute the actual rto_min value */ -static inline u32 tcp_rto_min(struct sock *sk) +static inline u32 tcp_rto_min(const struct sock *sk) { const struct dst_entry *dst = __sk_dst_get(sk); u32 rto_min = inet_csk(sk)->icsk_rto_min; @@ -729,7 +744,7 @@ static inline u32 tcp_rto_min(struct sock *sk) return rto_min; } -static inline u32 tcp_rto_min_us(struct sock *sk) +static inline u32 tcp_rto_min_us(const struct sock *sk) { return jiffies_to_usecs(tcp_rto_min(sk)); } @@ -789,22 +804,31 @@ static inline u64 tcp_clock_us(void) return div_u64(tcp_clock_ns(), NSEC_PER_USEC); } -/* This should only be used in contexts where tp->tcp_mstamp is up to date */ -static inline u32 tcp_time_stamp(const struct tcp_sock *tp) +static inline u64 tcp_clock_ms(void) { - return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); + return div_u64(tcp_clock_ns(), NSEC_PER_MSEC); } -/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */ -static inline u32 tcp_ns_to_ts(u64 ns) +/* TCP Timestamp included in TS option (RFC 1323) can either use ms + * or usec resolution. Each socket carries a flag to select one or other + * resolution, as the route attribute could change anytime. + * Each flow must stick to initial resolution. + */ +static inline u32 tcp_clock_ts(bool usec_ts) { - return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); + return usec_ts ? tcp_clock_us() : tcp_clock_ms(); } -/* Could use tcp_clock_us() / 1000, but this version uses a single divide */ -static inline u32 tcp_time_stamp_raw(void) +static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp) { - return tcp_ns_to_ts(tcp_clock_ns()); + return div_u64(tp->tcp_mstamp, USEC_PER_MSEC); +} + +static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp) +{ + if (tp->tcp_usec_ts) + return tp->tcp_mstamp; + return tcp_time_stamp_ms(tp); } void tcp_mstamp_refresh(struct tcp_sock *tp); @@ -814,17 +838,30 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } -static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) -{ - return tcp_ns_to_ts(skb->skb_mstamp_ns); -} - /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } +/* Provide skb TSval in usec or ms unit */ +static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb) +{ + if (usec_ts) + return tcp_skb_timestamp_us(skb); + + return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC); +} + +static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw) +{ + return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset; +} + +static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) +{ + return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off; +} #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) @@ -1453,13 +1490,15 @@ static inline int tcp_space_from_win(const struct sock *sk, int win) return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win); } +/* Assume a conservative default of 1200 bytes of payload per 4K page. + * This may be adjusted later in tcp_measure_rcv_mss(). + */ +#define TCP_DEFAULT_SCALING_RATIO ((1200 << TCP_RMEM_TO_WIN_SCALE) / \ + SKB_TRUESIZE(4096)) + static inline void tcp_scaling_ratio_init(struct sock *sk) { - /* Assume a conservative default of 1200 bytes of payload per 4K page. - * This may be adjusted later in tcp_measure_rcv_mss(). - */ - tcp_sk(sk)->scaling_ratio = (1200 << TCP_RMEM_TO_WIN_SCALE) / - SKB_TRUESIZE(4096); + tcp_sk(sk)->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; } /* Note: caller must be prepared to deal with negative returns */ @@ -1590,7 +1629,7 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) return true; if (unlikely(!time_before32(ktime_get_seconds(), - rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))) + rx_opt->ts_recent_stamp + TCP_PAWS_WRAP))) return true; /* * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, @@ -1650,12 +1689,7 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) tp->retransmit_skb_hint = NULL; } -union tcp_md5_addr { - struct in_addr a4; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr a6; -#endif -}; +#define tcp_md5_addr tcp_ao_addr /* - key database */ struct tcp_md5sig_key { @@ -1699,12 +1733,39 @@ union tcp_md5sum_block { #endif }; -/* - pool: digest algorithm, hash description and scratch buffer */ -struct tcp_md5sig_pool { - struct ahash_request *md5_req; - void *scratch; +/* + * struct tcp_sigpool - per-CPU pool of ahash_requests + * @scratch: per-CPU temporary area, that can be used between + * tcp_sigpool_start() and tcp_sigpool_end() to perform + * crypto request + * @req: pre-allocated ahash request + */ +struct tcp_sigpool { + void *scratch; + struct ahash_request *req; }; +int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size); +void tcp_sigpool_get(unsigned int id); +void tcp_sigpool_release(unsigned int id); +int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, + const struct sk_buff *skb, + unsigned int header_len); + +/** + * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + * @c: returned tcp_sigpool for usage (uninitialized on failure) + * + * Returns 0 on success, error otherwise. + */ +int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); +/** + * tcp_sigpool_end - enable bh and stop using tcp_sigpool + * @c: tcp_sigpool context that was returned by tcp_sigpool_start() + */ +void tcp_sigpool_end(struct tcp_sigpool *c); +size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len); /* - functions */ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); @@ -1717,6 +1778,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags); +void tcp_clear_md5_list(struct sock *sk); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); @@ -1725,20 +1787,29 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, extern struct static_key_false_deferred tcp_md5_needed; struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, - int family); + int family, bool any_l3index); static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; - return __tcp_md5_do_lookup(sk, l3index, addr, family); + return __tcp_md5_do_lookup(sk, l3index, addr, family, false); +} + +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return NULL; + return __tcp_md5_do_lookup(sk, 0, addr, family, true); } enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif); + int family, int l3index, const __u8 *hash_location); #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) @@ -1750,27 +1821,29 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return NULL; } +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + return NULL; +} + static inline enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif) + int family, int l3index, const __u8 *hash_location) { return SKB_NOT_DROPPED_YET; } #define tcp_twsk_md5_key(twsk) NULL #endif -bool tcp_alloc_md5sig_pool(void); - -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); -static inline void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); -} +int tcp_md5_alloc_sigpool(void); +void tcp_md5_release_sigpool(void); +void tcp_md5_add_sigpool(void); +extern int tcp_md5_sigpool_id; -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, - unsigned int header_len); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, +int tcp_md5_hash_key(struct tcp_sigpool *hp, const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ @@ -2075,7 +2148,11 @@ INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)) INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); +#ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); +#else +static inline void tcp_gro_complete(struct sk_buff *skb) { } +#endif void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); @@ -2115,6 +2192,18 @@ struct tcp_sock_af_ops { sockptr_t optval, int optlen); #endif +#ifdef CONFIG_TCP_AO + int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen); + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid); + int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); + int (*calc_ao_hash)(char *location, struct tcp_ao_key *ao, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +#endif }; struct tcp_request_sock_ops { @@ -2127,6 +2216,15 @@ struct tcp_request_sock_ops { const struct sock *sk, const struct sk_buff *skb); #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); + int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk); + int (*ao_synack_hash)(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +#endif #ifdef CONFIG_SYN_COOKIES __u32 (*cookie_init_seq)(const struct sk_buff *skb, __u16 *mss); @@ -2167,6 +2265,76 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, } #endif +struct tcp_key { + union { + struct { + struct tcp_ao_key *ao_key; + char *traffic_key; + u32 sne; + u8 rcv_next; + }; + struct tcp_md5sig_key *md5_key; + }; + enum { + TCP_KEY_NONE = 0, + TCP_KEY_MD5, + TCP_KEY_AO, + } type; +}; + +static inline void tcp_get_current_key(const struct sock *sk, + struct tcp_key *out) +{ +#if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG) + const struct tcp_sock *tp = tcp_sk(sk); +#endif + +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key)) { + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao) { + out->ao_key = READ_ONCE(ao->current_key); + out->type = TCP_KEY_AO; + return; + } + } +#endif +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + rcu_access_pointer(tp->md5sig_info)) { + out->md5_key = tp->af_specific->md5_lookup(sk, sk); + if (out->md5_key) { + out->type = TCP_KEY_MD5; + return; + } + } +#endif + out->type = TCP_KEY_NONE; +} + +static inline bool tcp_key_is_md5(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + key->type == TCP_KEY_MD5) + return true; +#endif + return false; +} + +static inline bool tcp_key_is_ao(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + key->type == TCP_KEY_AO) + return true; +#endif + return false; +} + int tcpv4_offload_init(void); void tcp_v4_init(void); @@ -2525,4 +2693,116 @@ static inline u64 tcp_transmit_time(const struct sock *sk) return 0; } +static inline int tcp_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const struct tcp_ao_hdr **aoh) +{ + const u8 *md5_tmp, *ao_tmp; + int ret; + + ret = tcp_do_parse_auth_options(th, &md5_tmp, &ao_tmp); + if (ret) + return ret; + + if (md5_hash) + *md5_hash = md5_tmp; + + if (aoh) { + if (!ao_tmp) + *aoh = NULL; + else + *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2); + } + + return 0; +} + +static inline bool tcp_ao_required(struct sock *sk, const void *saddr, + int family, int l3index, bool stat_inc) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + struct tcp_ao_key *ao_key; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return false; + + ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao_info) + return false; + + ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1); + if (ao_info->ao_required || ao_key) { + if (stat_inc) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED); + atomic64_inc(&ao_info->counters.ao_required); + } + return true; + } +#endif + return false; +} + +/* Called with rcu_read_lock() */ +static inline enum skb_drop_reason +tcp_inbound_hash(struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + const __u8 *md5_location; + int l3index; + + /* Invalid option or two times meet any of auth options */ + if (tcp_parse_auth_options(th, &md5_location, &aoh)) { + tcp_hash_fail("TCP segment has incorrect auth options set", + family, skb, ""); + return SKB_DROP_REASON_TCP_AUTH_HDR; + } + + if (req) { + if (tcp_rsk_used_ao(req) != !!aoh) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + tcp_hash_fail("TCP connection can't start/end using TCP-AO", + family, skb, "%s", + !aoh ? "missing AO" : "AO signed"); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + } + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + /* Fast path: unsigned segments */ + if (likely(!md5_location && !aoh)) { + /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid + * for the remote peer. On TCP-AO established connection + * the last key is impossible to remove, so there's + * always at least one current_key. + */ + if (tcp_ao_required(sk, saddr, family, l3index, true)) { + tcp_hash_fail("AO hash is required, but not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_AONOTFOUND; + } + if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + tcp_hash_fail("MD5 Hash not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_MD5NOTFOUND; + } + return SKB_NOT_DROPPED_YET; + } + + if (aoh) + return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); + + return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, + l3index, md5_location); +} + #endif /* _TCP_H */ diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h new file mode 100644 index 000000000000..b56be10838f0 --- /dev/null +++ b/include/net/tcp_ao.h @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _TCP_AO_H +#define _TCP_AO_H + +#define TCP_AO_KEY_ALIGN 1 +#define __tcp_ao_key_align __aligned(TCP_AO_KEY_ALIGN) + +union tcp_ao_addr { + struct in_addr a4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr a6; +#endif +}; + +struct tcp_ao_hdr { + u8 kind; + u8 length; + u8 keyid; + u8 rnext_keyid; +}; + +struct tcp_ao_counters { + atomic64_t pkt_good; + atomic64_t pkt_bad; + atomic64_t key_not_found; + atomic64_t ao_required; + atomic64_t dropped_icmp; +}; + +struct tcp_ao_key { + struct hlist_node node; + union tcp_ao_addr addr; + u8 key[TCP_AO_MAXKEYLEN] __tcp_ao_key_align; + unsigned int tcp_sigpool_id; + unsigned int digest_size; + int l3index; + u8 prefixlen; + u8 family; + u8 keylen; + u8 keyflags; + u8 sndid; + u8 rcvid; + u8 maclen; + struct rcu_head rcu; + atomic64_t pkt_good; + atomic64_t pkt_bad; + u8 traffic_keys[]; +}; + +static inline u8 *rcv_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys; +} + +static inline u8 *snd_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys + key->digest_size; +} + +static inline int tcp_ao_maclen(const struct tcp_ao_key *key) +{ + return key->maclen; +} + +static inline int tcp_ao_len(const struct tcp_ao_key *key) +{ + return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr); +} + +static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key) +{ + return key->digest_size; +} + +static inline int tcp_ao_sizeof_key(const struct tcp_ao_key *key) +{ + return sizeof(struct tcp_ao_key) + (key->digest_size << 1); +} + +struct tcp_ao_info { + /* List of tcp_ao_key's */ + struct hlist_head head; + /* current_key and rnext_key aren't maintained on listen sockets. + * Their purpose is to cache keys on established connections, + * saving needless lookups. Never dereference any of them from + * listen sockets. + * ::current_key may change in RX to the key that was requested by + * the peer, please use READ_ONCE()/WRITE_ONCE() in order to avoid + * load/store tearing. + * Do the same for ::rnext_key, if you don't hold socket lock + * (it's changed only by userspace request in setsockopt()). + */ + struct tcp_ao_key *current_key; + struct tcp_ao_key *rnext_key; + struct tcp_ao_counters counters; + u32 ao_required :1, + accept_icmps :1, + __unused :30; + __be32 lisn; + __be32 risn; + /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ, + * that protect TCP-AO connection from replayed old TCP segments. + * See RFC5925 (6.2). + * In order to get correct SNE, there's a helper tcp_ao_compute_sne(). + * It needs SEQ basis to understand whereabouts are lower SEQ numbers. + * According to that basis vector, it can provide incremented SNE + * when SEQ rolls over or provide decremented SNE when there's + * a retransmitted segment from before-rolling over. + * - for request sockets such basis is rcv_isn/snt_isn, which seems + * good enough as it's unexpected to receive 4 Gbytes on reqsk. + * - for full sockets the basis is rcv_nxt/snd_una. snd_una is + * taken instead of snd_nxt as currently it's easier to track + * in tcp_snd_una_update(), rather than updating SNE in all + * WRITE_ONCE(tp->snd_nxt, ...) + * - for time-wait sockets the basis is tw_rcv_nxt/tw_snd_nxt. + * tw_snd_nxt is not expected to change, while tw_rcv_nxt may. + */ + u32 snd_sne; + u32 rcv_sne; + refcount_t refcnt; /* Protects twsk destruction */ + struct rcu_head rcu; +}; + +#define tcp_hash_fail(msg, family, skb, fmt, ...) \ +do { \ + const struct tcphdr *th = tcp_hdr(skb); \ + char hdr_flags[6]; \ + char *f = hdr_flags; \ + \ + if (th->fin) \ + *f++ = 'F'; \ + if (th->syn) \ + *f++ = 'S'; \ + if (th->rst) \ + *f++ = 'R'; \ + if (th->psh) \ + *f++ = 'P'; \ + if (th->ack) \ + *f++ = '.'; \ + *f = 0; \ + if ((family) == AF_INET) { \ + net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ + msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ + &ip_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } else { \ + net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ + msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ + &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } \ +} while (0) + +#ifdef CONFIG_TCP_AO +/* TCP-AO structures and functions */ +#include <linux/jump_label.h> +extern struct static_key_false_deferred tcp_ao_needed; + +struct tcp4_ao_context { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp6_ao_context { + struct in6_addr saddr; + struct in6_addr daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp_sigpool; +#define TCP_AO_ESTABLISHED (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | \ + TCPF_CLOSE | TCPF_CLOSE_WAIT | \ + TCPF_LAST_ACK | TCPF_CLOSING) + +int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location); +int tcp_ao_hash_skb(unsigned short int family, + char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, + sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid); +int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, + struct request_sock *req, struct sk_buff *skb, + int family); +int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, + unsigned int len, struct tcp_sigpool *hp); +void tcp_ao_destroy_sock(struct sock *sk, bool twsk); +void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); +bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code); +int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen); +enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, int l3index, + const struct tcp_ao_hdr *aoh); +u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq); +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, + const union tcp_ao_addr *addr, + int family, int sndid, int rcvid); +int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne); +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, u32 seq, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne); + +/* ipv4 specific functions */ +int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, + int sndid, int rcvid); +int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); +int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); +int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +/* ipv6 specific functions */ +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes); +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, __be32 sisn, __be32 disn); +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send); +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, int sndid, int rcvid); +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +void tcp_ao_established(struct sock *sk); +void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); +void tcp_ao_connect_init(struct sock *sk); +void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family, int l3index); +#else /* CONFIG_TCP_AO */ + +static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location) +{ + return 0; +} + +static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family, int l3index) +{ +} + +static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int family, + int type, int code) +{ + return false; +} + +static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, int l3index, + const struct tcp_ao_hdr *aoh) +{ + return SKB_NOT_DROPPED_YET; +} + +static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, + int l3index, const union tcp_ao_addr *addr, + int family, int sndid, int rcvid) +{ + return NULL; +} + +static inline void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ +} + +static inline void tcp_ao_established(struct sock *sk) +{ +} + +static inline void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) +{ +} + +static inline void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, + struct tcp_sock *tp) +{ +} + +static inline void tcp_ao_connect_init(struct sock *sk) +{ +} + +static inline int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_get_repair(struct sock *sk, + sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_set_repair(struct sock *sk, + sockptr_t optval, unsigned int optlen) +{ + return -ENOPROTOOPT; +} +#endif + +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash); +#else +static inline int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) +{ + *md5_hash = NULL; + *ao_hash = NULL; + return 0; +} +#endif + +#endif /* _TCP_AO_H */ diff --git a/include/net/tcx.h b/include/net/tcx.h index 264f147953ba..04be9377785d 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -38,16 +38,11 @@ static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry) return container_of(bundle, struct tcx_entry, bundle); } -static inline struct tcx_link *tcx_link(struct bpf_link *link) +static inline struct tcx_link *tcx_link(const struct bpf_link *link) { return container_of(link, struct tcx_link, link); } -static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link) -{ - return tcx_link((struct bpf_link *)link); -} - void tcx_inc(void); void tcx_dec(void); diff --git a/include/net/tls.h b/include/net/tls.h index a2b44578dcb7..962f0c501111 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -61,7 +61,8 @@ struct tls_rec; #define TLS_AAD_SPACE_SIZE 13 -#define MAX_IV_SIZE 16 +#define TLS_MAX_IV_SIZE 16 +#define TLS_MAX_SALT_SIZE 4 #define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 #define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE @@ -149,6 +150,7 @@ struct tls_record_info { skb_frag_t frags[MAX_SKB_FRAGS]; }; +#define TLS_DRIVER_STATE_SIZE_TX 16 struct tls_offload_context_tx { struct crypto_aead *aead_send; spinlock_t lock; /* protects records list */ @@ -162,17 +164,13 @@ struct tls_offload_context_tx { void (*sk_destruct)(struct sock *sk); struct work_struct destruct_work; struct tls_context *ctx; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_TX 16 + u8 driver_state[TLS_DRIVER_STATE_SIZE_TX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_TX \ - (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) - enum tls_context_flags { /* tls_device_down was called after the netdev went down, device state * was released, and kTLS works in software, even though rx_conf is @@ -193,8 +191,8 @@ enum tls_context_flags { }; struct cipher_context { - char *iv; - char *rec_seq; + char iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE]; + char rec_seq[TLS_MAX_REC_SEQ_SIZE]; }; union tls_crypto_context { @@ -302,6 +300,7 @@ struct tls_offload_resync_async { u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; +#define TLS_DRIVER_STATE_SIZE_RX 8 struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; @@ -325,17 +324,13 @@ struct tls_offload_context_rx { struct tls_offload_resync_async *resync_async; }; }; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_RX 8 + u8 driver_state[TLS_DRIVER_STATE_SIZE_RX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_RX \ - (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) - struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0ca9b7a11baf..d716214fe03d 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -154,13 +154,30 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck); void udp_tunnel_sock_release(struct socket *sock); +struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, int oif, + __be32 *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 tos, + struct dst_cache *dst_cache); +struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, int oif, + struct in6_addr *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 dsfield, + struct dst_cache *dst_cache); + struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); @@ -174,16 +191,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif -static inline void udp_tunnel_encap_enable(struct socket *sock) +static inline void udp_tunnel_encap_enable(struct sock *sk) { - struct udp_sock *up = udp_sk(sock->sk); - - if (up->encap_enabled) + if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; - up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) - if (sock->sk->sk_family == PF_INET6) + if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); diff --git a/include/net/udplite.h b/include/net/udplite.h index bd33ff2b8f42..786919d29f8d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); + const struct sock *sk = skb->sk; int len = skb->len - off; - if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { - if (0 < up->pcslen) - len = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); + if (udp_test_bit(UDPLITE_SEND_CC, sk)) { + u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); + + if (pcslen < len) { + if (pcslen > 0) + len = pcslen; + udp_hdr(skb)->len = htons(pcslen); + } } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ diff --git a/include/net/xdp.h b/include/net/xdp.h index de08c8e0d134..349c36fb5fd8 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -383,14 +383,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE +/* Define the relationship between xdp-rx-metadata kfunc and + * various other entities: + * - xdp_rx_metadata enum + * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml) + * - kfunc name + * - xdp_metadata_ops field + */ #define XDP_METADATA_KFUNC_xxx \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ - bpf_xdp_metadata_rx_timestamp) \ + NETDEV_XDP_RX_METADATA_TIMESTAMP, \ + bpf_xdp_metadata_rx_timestamp, \ + xmo_rx_timestamp) \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ - bpf_xdp_metadata_rx_hash) \ + NETDEV_XDP_RX_METADATA_HASH, \ + bpf_xdp_metadata_rx_hash, \ + xmo_rx_hash) \ -enum { -#define XDP_METADATA_KFUNC(name, _) name, +enum xdp_rx_metadata { +#define XDP_METADATA_KFUNC(name, _, __, ___) name, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC MAX_XDP_METADATA_KFUNC, diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1617af380162..f83128007fb0 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -14,6 +14,8 @@ #include <linux/mm.h> #include <net/sock.h> +#define XDP_UMEM_SG_FLAG (1 << 1) + struct net_device; struct xsk_queue; struct xdp_buff; @@ -61,6 +63,13 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head tx_list; + /* record the number of tx descriptors sent by this xsk and + * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs + * to be given to other xsks for sending tx descriptors, thereby + * preventing other XSKs from being starved. + */ + u32 tx_budget_spent; + /* Protects generic receive. */ spinlock_t rx_lock; @@ -107,4 +116,13 @@ static inline void __xsk_map_flush(void) #endif /* CONFIG_XDP_SOCKETS */ +#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) +bool xsk_map_check_flush(void); +#else +static inline bool xsk_map_check_flush(void) +{ + return false; +} +#endif + #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 363c7d510554..c9bb0f892f55 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1207,20 +1207,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1); } -int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse); -static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 0); + return __xfrm_decode_session(net, skb, fl, family, 0); } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 1); + return __xfrm_decode_session(net, skb, fl, family, 1); } int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); @@ -1296,7 +1296,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { return 1; } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { @@ -1669,7 +1669,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); #endif void xfrm_local_error(struct sk_buff *skb, int mtu); -int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); @@ -1689,7 +1688,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -1712,6 +1710,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen); #else @@ -2166,7 +2168,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; + return inet6_test_bit(DONTFRAG, sk); return false; } |