diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2023-11-07 02:42:08 +0300 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2023-11-07 02:42:08 +0300 |
| commit | cdd5b5a9761fd66d17586e4f4ba6588c70e640ea (patch) | |
| tree | aba8409818be01f6af8683bf76594c790942d0bc /include/net | |
| parent | 5c15c60e7be615f05a45cd905093a54b11f461bc (diff) | |
| parent | 28d3fe32354701decc3e76d89712569c269b5e4f (diff) | |
| download | linux-cdd5b5a9761fd66d17586e4f4ba6588c70e640ea.tar.xz | |
Merge branch 'next' into for-linus
Prepare input updates for 6.7 merge window.
Diffstat (limited to 'include/net')
52 files changed, 992 insertions, 417 deletions
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 1b4230cd42a3..af729859385e 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -185,7 +185,7 @@ struct bt_iso_ucast_qos { struct bt_iso_bcast_qos { __u8 big; __u8 bis; - __u8 sync_interval; + __u8 sync_factor; __u8 packing; __u8 framing; struct bt_iso_io_qos in; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9654567cfae3..e01d52cb668c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -593,9 +593,7 @@ struct hci_dev { const char *fw_info; struct dentry *debugfs; -#ifdef CONFIG_DEV_COREDUMP struct hci_devcoredump dump; -#endif struct device dev; @@ -822,6 +820,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; + /* Accessed without hdev->lock: */ hci_conn_flags_t flags; u8 privacy_mode; }; @@ -1573,7 +1572,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_clear_disabled(struct hci_dev *hdev); +void hci_conn_params_free(struct hci_conn_params *param); +void hci_pend_le_list_del_init(struct hci_conn_params *param); +void hci_pend_le_list_add(struct hci_conn_params *param, + struct list_head *list); struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index a5801649f619..5e68b3dd4422 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -979,6 +979,7 @@ struct mgmt_ev_auth_failed { #define MGMT_DEV_FOUND_NOT_CONNECTABLE BIT(2) #define MGMT_DEV_FOUND_INITIATED_CONN BIT(3) #define MGMT_DEV_FOUND_NAME_REQUEST_FAILED BIT(4) +#define MGMT_DEV_FOUND_SCAN_RSP BIT(5) #define MGMT_EV_DEVICE_FOUND 0x0012 struct mgmt_ev_device_found { diff --git a/include/net/bonding.h b/include/net/bonding.h index 59955ac33157..5b8b1b644a2d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-1.0+ */ /* * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. * @@ -7,9 +8,6 @@ * BUT, I'm the one who modified it for ethernet, so: * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov * - * This software may be used and distributed according to the terms - * of the GNU Public License, incorporated herein by reference. - * */ #ifndef _NET_BONDING_H @@ -279,7 +277,7 @@ struct bond_vlan_tag { unsigned short vlan_id; }; -/** +/* * Returns NULL if the net_device does not belong to any of the bond's slaves * * Caller must hold bond lock for read @@ -724,23 +722,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, } /* Caller must hold rcu_read_lock() for read */ -static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) +static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac) { struct list_head *iter; struct slave *tmp; - struct netdev_hw_addr *ha; bond_for_each_slave_rcu(bond, tmp, iter) if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) return true; - - if (netdev_uc_empty(bond->dev)) - return false; - - netdev_for_each_uc_addr(ha, bond->dev) - if (ether_addr_equal_64bits(mac, ha->addr)) - return true; - return false; } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9e04f69712b1..d6fa7c8767ad 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7,7 +7,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2021, 2023 Intel Corporation */ #include <linux/ethtool.h> @@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, if (WARN_ON(iftype >= NL80211_IFTYPE_MAX)) return NULL; + if (iftype == NL80211_IFTYPE_AP_VLAN) + iftype = NL80211_IFTYPE_AP; + for (i = 0; i < sband->n_iftype_data; i++) { const struct ieee80211_sband_iftype_data *data = &sband->iftype_data[i]; @@ -1702,6 +1705,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS * @RATE_INFO_FLAGS_EHT_MCS: EHT MCS information + * @RATE_INFO_FLAGS_S1G_MCS: MCS field filled with S1G MCS */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1712,6 +1716,7 @@ enum rate_info_flags { RATE_INFO_FLAGS_EDMG = BIT(5), RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), RATE_INFO_FLAGS_EHT_MCS = BIT(7), + RATE_INFO_FLAGS_S1G_MCS = BIT(8), }; /** @@ -1728,6 +1733,11 @@ enum rate_info_flags { * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation * @RATE_INFO_BW_320: 320 MHz bandwidth * @RATE_INFO_BW_EHT_RU: bandwidth determined by EHT RU allocation + * @RATE_INFO_BW_1: 1 MHz bandwidth + * @RATE_INFO_BW_2: 2 MHz bandwidth + * @RATE_INFO_BW_4: 4 MHz bandwidth + * @RATE_INFO_BW_8: 8 MHz bandwidth + * @RATE_INFO_BW_16: 16 MHz bandwidth */ enum rate_info_bw { RATE_INFO_BW_20 = 0, @@ -1739,6 +1749,11 @@ enum rate_info_bw { RATE_INFO_BW_HE_RU, RATE_INFO_BW_320, RATE_INFO_BW_EHT_RU, + RATE_INFO_BW_1, + RATE_INFO_BW_2, + RATE_INFO_BW_4, + RATE_INFO_BW_8, + RATE_INFO_BW_16, }; /** @@ -1747,8 +1762,8 @@ enum rate_info_bw { * Information about a receiving or transmitting bitrate * * @flags: bitflag of flags from &enum rate_info_flags - * @mcs: mcs index if struct describes an HT/VHT/HE rate * @legacy: bitrate in 100kbit/s for 802.11abg + * @mcs: mcs index if struct describes an HT/VHT/HE/EHT/S1G rate * @nss: number of streams (VHT & HE only) * @bw: bandwidth (from &enum rate_info_bw) * @he_gi: HE guard interval (from &enum nl80211_he_gi) @@ -1761,9 +1776,9 @@ enum rate_info_bw { * only valid if bw is %RATE_INFO_BW_EHT_RU) */ struct rate_info { - u8 flags; - u8 mcs; + u16 flags; u16 legacy; + u8 mcs; u8 nss; u8 bw; u8 he_gi; @@ -2454,6 +2469,7 @@ struct cfg80211_scan_info { * @short_ssid_valid: @short_ssid is valid and can be used * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait * 20 TUs before starting to send probe requests. + * @psd_20: The AP's 20 MHz PSD value. */ struct cfg80211_scan_6ghz_params { u32 short_ssid; @@ -2462,6 +2478,7 @@ struct cfg80211_scan_6ghz_params { bool unsolicited_probe; bool short_ssid_valid; bool psc_no_listen; + s8 psd_20; }; /** @@ -2708,6 +2725,7 @@ enum cfg80211_signal_type { * the BSS that requested the scan in which the beacon/probe was received. * @chains: bitmask for filled values in @chain_signal. * @chain_signal: per-chain signal strength of last received BSS in dBm. + * @drv_data: Data to be passed through to @inform_bss */ struct cfg80211_inform_bss { struct ieee80211_channel *chan; @@ -2718,6 +2736,8 @@ struct cfg80211_inform_bss { u8 parent_bssid[ETH_ALEN] __aligned(2); u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; + + void *drv_data; }; /** @@ -2870,11 +2890,14 @@ struct cfg80211_auth_request { * if this is %NULL for a link, that link is not requested * @elems: extra elements for the per-STA profile for this link * @elems_len: length of the elements + * @disabled: If set this link should be included during association etc. but it + * should not be used until enabled by the AP MLD. */ struct cfg80211_assoc_link { struct cfg80211_bss *bss; const u8 *elems; size_t elems_len; + bool disabled; }; /** @@ -4086,6 +4109,13 @@ struct mgmt_frame_regs { * * @change_bss: Modify parameters for a given BSS. * + * @inform_bss: Called by cfg80211 while being informed about new BSS data + * for every BSS found within the reported data or frame. This is called + * from within the cfg8011 inform_bss handlers while holding the bss_lock. + * The data parameter is passed through from drv_data inside + * struct cfg80211_inform_bss. + * The new IE data for the BSS is explicitly passed. + * * @set_txq_params: Set TX queue parameters * * @libertas_set_mesh_channel: Only for backward compatibility for libertas, @@ -4473,6 +4503,9 @@ struct cfg80211_ops { int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); + void (*inform_bss)(struct wiphy *wiphy, struct cfg80211_bss *bss, + const struct cfg80211_bss_ies *ies, void *data); + int (*set_txq_params)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params); @@ -4592,9 +4625,10 @@ struct cfg80211_ops { struct cfg80211_gtk_rekey_data *data); int (*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 action_code, u8 dialog_token, - u16 status_code, u32 peer_capability, - bool initiator, const u8 *buf, size_t len); + const u8 *peer, int link_id, + u8 action_code, u8 dialog_token, u16 status_code, + u32 peer_capability, bool initiator, + const u8 *buf, size_t len); int (*tdls_oper)(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper); @@ -5724,12 +5758,17 @@ struct cfg80211_cqm_config; * wiphy_lock - lock the wiphy * @wiphy: the wiphy to lock * - * This is mostly exposed so it can be done around registering and - * unregistering netdevs that aren't created through cfg80211 calls, - * since that requires locking in cfg80211 when the notifiers is - * called, but that cannot differentiate which way it's called. + * This is needed around registering and unregistering netdevs that + * aren't created through cfg80211 calls, since that requires locking + * in cfg80211 when the notifiers is called, but that cannot + * differentiate which way it's called. + * + * It can also be used by drivers for their own purposes. * * When cfg80211 ops are called, the wiphy is already locked. + * + * Note that this makes sure that no workers that have been queued + * with wiphy_queue_work() are running. */ static inline void wiphy_lock(struct wiphy *wiphy) __acquires(&wiphy->mtx) @@ -5749,6 +5788,88 @@ static inline void wiphy_unlock(struct wiphy *wiphy) mutex_unlock(&wiphy->mtx); } +struct wiphy_work; +typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *); + +struct wiphy_work { + struct list_head entry; + wiphy_work_func_t func; +}; + +static inline void wiphy_work_init(struct wiphy_work *work, + wiphy_work_func_t func) +{ + INIT_LIST_HEAD(&work->entry); + work->func = func; +} + +/** + * wiphy_work_queue - queue work for the wiphy + * @wiphy: the wiphy to queue for + * @work: the work item + * + * This is useful for work that must be done asynchronously, and work + * queued here has the special property that the wiphy mutex will be + * held as if wiphy_lock() was called, and that it cannot be running + * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can + * use just cancel_work() instead of cancel_work_sync(), it requires + * being in a section protected by wiphy_lock(). + */ +void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work); + +/** + * wiphy_work_cancel - cancel previously queued work + * @wiphy: the wiphy, for debug purposes + * @work: the work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work); + +struct wiphy_delayed_work { + struct wiphy_work work; + struct wiphy *wiphy; + struct timer_list timer; +}; + +void wiphy_delayed_work_timer(struct timer_list *t); + +static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork, + wiphy_work_func_t func) +{ + timer_setup(&dwork->timer, wiphy_delayed_work_timer, 0); + wiphy_work_init(&dwork->work, func); +} + +/** + * wiphy_delayed_work_queue - queue delayed work for the wiphy + * @wiphy: the wiphy to queue for + * @dwork: the delayable worker + * @delay: number of jiffies to wait before queueing + * + * This is useful for work that must be done asynchronously, and work + * queued here has the special property that the wiphy mutex will be + * held as if wiphy_lock() was called, and that it cannot be running + * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can + * use just cancel_work() instead of cancel_work_sync(), it requires + * being in a section protected by wiphy_lock(). + */ +void wiphy_delayed_work_queue(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork, + unsigned long delay); + +/** + * wiphy_delayed_work_cancel - cancel previously queued delayed work + * @wiphy: the wiphy, for debug purposes + * @dwork: the delayed work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_delayed_work_cancel(struct wiphy *wiphy, + struct wiphy_delayed_work *dwork); + /** * struct wireless_dev - wireless device state * @@ -6561,6 +6682,28 @@ cfg80211_find_vendor_ie(unsigned int oui, int oui_type, } /** + * cfg80211_defragment_element - Defrag the given element data into a buffer + * + * @elem: the element to defragment + * @ies: elements where @elem is contained + * @ieslen: length of @ies + * @data: buffer to store element data + * @data_len: length of @data + * @frag_id: the element ID of fragments + * + * Return: length of @data, or -EINVAL on error + * + * Copy out all data from an element that may be fragmented into @data, while + * skipping all headers. + * + * The function uses memmove() internally. It is acceptable to defragment an + * element in-place. + */ +ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies, + size_t ieslen, u8 *data, size_t data_len, + u8 frag_id); + +/** * cfg80211_send_layer2_update - send layer 2 update frame * * @dev: network device @@ -9067,4 +9210,17 @@ static inline int cfg80211_color_change_notify(struct net_device *dev) bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, const struct cfg80211_chan_def *chandef); +/** + * cfg80211_links_removed - Notify about removed STA MLD setup links. + * @dev: network device. + * @link_mask: BIT mask of removed STA MLD setup link IDs. + * + * Inform cfg80211 and the userspace about removed STA MLD setup links due to + * AP MLD removing the corresponding affiliated APs with Multi-Link + * reconfiguration. Note that it's not valid to remove all links, in this + * case disconnect instead. + * Also note that the wdev mutex must be held. + */ +void cfg80211_links_removed(struct net_device *dev, u16 link_mask); + #endif /* __NET_CFG80211_H */ diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 0c2778a836db..f79ce133e51a 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -170,7 +170,8 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) } /** - * @WPAN_PHY_FLAG_TRANSMIT_POWER: Indicates that transceiver will support + * enum wpan_phy_flags - WPAN PHY state flags + * @WPAN_PHY_FLAG_TXPOWER: Indicates that transceiver will support * transmit power setting. * @WPAN_PHY_FLAG_CCA_ED_LEVEL: Indicates that transceiver will support cca ed * level setting. @@ -178,12 +179,15 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) * setting. * @WPAN_PHY_FLAG_STATE_QUEUE_STOPPED: Indicates that the transmit queue was * temporarily stopped. + * @WPAN_PHY_FLAG_DATAGRAMS_ONLY: Indicates that transceiver is only able to + * send/receive datagrams. */ enum wpan_phy_flags { WPAN_PHY_FLAG_TXPOWER = BIT(1), WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2), WPAN_PHY_FLAG_CCA_MODE = BIT(3), WPAN_PHY_FLAG_STATE_QUEUE_STOPPED = BIT(4), + WPAN_PHY_FLAG_DATAGRAMS_ONLY = BIT(5), }; struct wpan_phy { diff --git a/include/net/codel.h b/include/net/codel.h index 5fed2f16cb8d..aa80f744826c 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -145,8 +145,8 @@ struct codel_vars { * @maxpacket: largest packet we've seen so far * @drop_count: temp count of dropped packets in dequeue() * @drop_len: bytes of dropped packets in dequeue() - * ecn_mark: number of packets we ECN marked instead of dropping - * ce_mark: number of packets CE marked because sojourn time was above ce_threshold + * @ecn_mark: number of packets we ECN marked instead of dropping + * @ce_mark: number of packets CE marked because sojourn time was above ce_threshold */ struct codel_stats { u32 maxpacket; diff --git a/include/net/devlink.h b/include/net/devlink.h index 6a942e70e451..0cdb4b16e5b5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -123,6 +123,7 @@ struct devlink_port { struct list_head list; struct list_head region_list; struct devlink *devlink; + const struct devlink_port_ops *ops; unsigned int index; spinlock_t type_lock; /* Protects type and type_eth/ib * structures consistency. @@ -220,7 +221,7 @@ struct devlink_dpipe_field { /** * struct devlink_dpipe_header - dpipe header object * @name: header name - * @id: index, global/local detrmined by global bit + * @id: index, global/local determined by global bit * @fields: fields * @fields_count: number of fields * @global: indicates if header is shared like most protocol header @@ -240,7 +241,7 @@ struct devlink_dpipe_header { * @header_index: header index (packets can have several headers of same * type like in case of tunnels) * @header: header - * @fieled_id: field index + * @field_id: field index */ struct devlink_dpipe_match { enum devlink_dpipe_match_type type; @@ -255,7 +256,7 @@ struct devlink_dpipe_match { * @header_index: header index (packets can have several headers of same * type like in case of tunnels) * @header: header - * @fieled_id: field index + * @field_id: field index */ struct devlink_dpipe_action { enum devlink_dpipe_action_type type; @@ -291,7 +292,7 @@ struct devlink_dpipe_value { * struct devlink_dpipe_entry - table entry object * @index: index of the entry in the table * @match_values: match values - * @matche_values_count: count of matches tuples + * @match_values_count: count of matches tuples * @action_values: actions values * @action_values_count: count of actions values * @counter: value of counter @@ -341,7 +342,9 @@ struct devlink_dpipe_table_ops; */ struct devlink_dpipe_table { void *priv; + /* private: */ struct list_head list; + /* public: */ const char *name; bool counters_enabled; bool counter_control_extern; @@ -354,13 +357,13 @@ struct devlink_dpipe_table { /** * struct devlink_dpipe_table_ops - dpipe_table ops - * @actions_dump - dumps all tables actions - * @matches_dump - dumps all tables matches - * @entries_dump - dumps all active entries in the table - * @counters_set_update - when changing the counter status hardware sync + * @actions_dump: dumps all tables actions + * @matches_dump: dumps all tables matches + * @entries_dump: dumps all active entries in the table + * @counters_set_update: when changing the counter status hardware sync * maybe needed to allocate/free counter related * resources - * @size_get - get size + * @size_get: get size */ struct devlink_dpipe_table_ops { int (*actions_dump)(void *priv, struct sk_buff *skb); @@ -373,8 +376,8 @@ struct devlink_dpipe_table_ops { /** * struct devlink_dpipe_headers - dpipe headers - * @headers - header array can be shared (global bit) or driver specific - * @headers_count - count of headers + * @headers: header array can be shared (global bit) or driver specific + * @headers_count: count of headers */ struct devlink_dpipe_headers { struct devlink_dpipe_header **headers; @@ -386,7 +389,7 @@ struct devlink_dpipe_headers { * @size_min: minimum size which can be set * @size_max: maximum size which can be set * @size_granularity: size granularity - * @size_unit: resource's basic unit + * @unit: resource's basic unit */ struct devlink_resource_size_params { u64 size_min; @@ -456,6 +459,7 @@ struct devlink_flash_notify { /** * struct devlink_param - devlink configuration parameter data + * @id: devlink parameter id number * @name: name of the parameter * @generic: indicates if the parameter is generic or driver specific * @type: parameter type @@ -631,6 +635,7 @@ enum devlink_param_generic_id { * struct devlink_flash_update_params - Flash Update parameters * @fw: pointer to the firmware data to update from * @component: the flash component to update + * @overwrite_mask: which types of flash update are supported (may be %0) * * With the exception of fw, drivers must opt-in to parameters by * setting the appropriate bit in the supported_flash_update_params field in @@ -1261,7 +1266,7 @@ struct devlink_ops { /** * @supported_flash_update_params: * mask of parameters supported by the driver's .flash_update - * implemementation. + * implementation. */ u32 supported_flash_update_params; unsigned long reload_actions; @@ -1273,12 +1278,6 @@ struct devlink_ops { int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action, enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack); - int (*port_type_set)(struct devlink_port *devlink_port, - enum devlink_port_type port_type); - int (*port_split)(struct devlink *devlink, struct devlink_port *port, - unsigned int count, struct netlink_ext_ack *extack); - int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, - struct netlink_ext_ack *extack); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info); @@ -1435,80 +1434,17 @@ struct devlink_ops { const struct devlink_trap_policer *policer, u64 *p_drops); /** - * @port_function_hw_addr_get: Port function's hardware address get function. - * - * Should be used by device drivers to report the hardware address of a function managed - * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port - * function handling for a particular port. - * - * Note: @extack can be NULL when port notifier queries the port function. - */ - int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, - int *hw_addr_len, - struct netlink_ext_ack *extack); - /** - * @port_function_hw_addr_set: Port function's hardware address set function. - * - * Should be used by device drivers to set the hardware address of a function managed - * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port - * function handling for a particular port. - */ - int (*port_function_hw_addr_set)(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack); - /** - * @port_fn_roce_get: Port function's roce get function. - * - * Query RoCE state of a function managed by the devlink port. - * Return -EOPNOTSUPP if port function RoCE handling is not supported. - */ - int (*port_fn_roce_get)(struct devlink_port *devlink_port, - bool *is_enable, - struct netlink_ext_ack *extack); - /** - * @port_fn_roce_set: Port function's roce set function. - * - * Enable/Disable the RoCE state of a function managed by the devlink - * port. - * Return -EOPNOTSUPP if port function RoCE handling is not supported. - */ - int (*port_fn_roce_set)(struct devlink_port *devlink_port, - bool enable, struct netlink_ext_ack *extack); - /** - * @port_fn_migratable_get: Port function's migratable get function. - * - * Query migratable state of a function managed by the devlink port. - * Return -EOPNOTSUPP if port function migratable handling is not - * supported. - */ - int (*port_fn_migratable_get)(struct devlink_port *devlink_port, - bool *is_enable, - struct netlink_ext_ack *extack); - /** - * @port_fn_migratable_set: Port function's migratable set function. - * - * Enable/Disable migratable state of a function managed by the devlink - * port. - * Return -EOPNOTSUPP if port function migratable handling is not - * supported. - */ - int (*port_fn_migratable_set)(struct devlink_port *devlink_port, - bool enable, - struct netlink_ext_ack *extack); - /** * port_new() - Add a new port function of a specified flavor * @devlink: Devlink instance * @attrs: attributes of the new port * @extack: extack for reporting error messages - * @new_port_index: index of the new port + * @devlink_port: pointer to store new devlink port pointer * * Devlink core will call this device driver function upon user request * to create a new port function of a specified flavor and optional * attributes * * Notes: - * - Called without devlink instance lock being held. Drivers must - * implement own means of synchronization * - On success, drivers must register a port with devlink core * * Return: 0 on success, negative value otherwise. @@ -1516,56 +1452,7 @@ struct devlink_ops { int (*port_new)(struct devlink *devlink, const struct devlink_port_new_attrs *attrs, struct netlink_ext_ack *extack, - unsigned int *new_port_index); - /** - * port_del() - Delete a port function - * @devlink: Devlink instance - * @port_index: port function index to delete - * @extack: extack for reporting error messages - * - * Devlink core will call this device driver function upon user request - * to delete a previously created port function - * - * Notes: - * - Called without devlink instance lock being held. Drivers must - * implement own means of synchronization - * - On success, drivers must unregister the corresponding devlink - * port - * - * Return: 0 on success, negative value otherwise. - */ - int (*port_del)(struct devlink *devlink, unsigned int port_index, - struct netlink_ext_ack *extack); - /** - * port_fn_state_get() - Get the state of a port function - * @devlink: Devlink instance - * @port: The devlink port - * @state: Admin configured state - * @opstate: Current operational state - * @extack: extack for reporting error messages - * - * Reports the admin and operational state of a devlink port function - * - * Return: 0 on success, negative value otherwise. - */ - int (*port_fn_state_get)(struct devlink_port *port, - enum devlink_port_fn_state *state, - enum devlink_port_fn_opstate *opstate, - struct netlink_ext_ack *extack); - /** - * port_fn_state_set() - Set the admin state of a port function - * @devlink: Devlink instance - * @port: The devlink port - * @state: Admin state - * @extack: extack for reporting error messages - * - * Set the admin state of a devlink port function - * - * Return: 0 on success, negative value otherwise. - */ - int (*port_fn_state_set)(struct devlink_port *port, - enum devlink_port_fn_state state, - struct netlink_ext_ack *extack); + struct devlink_port **devlink_port); /** * Rate control callbacks. @@ -1655,15 +1542,116 @@ void devl_unregister(struct devlink *devlink); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); + +/** + * struct devlink_port_ops - Port operations + * @port_split: Callback used to split the port into multiple ones. + * @port_unsplit: Callback used to unsplit the port group back into + * a single port. + * @port_type_set: Callback used to set a type of a port. + * @port_del: Callback used to delete selected port along with related function. + * Devlink core calls this upon user request to delete + * a port previously created by devlink_ops->port_new(). + * @port_fn_hw_addr_get: Callback used to set port function's hardware address. + * Should be used by device drivers to report + * the hardware address of a function managed + * by the devlink port. + * @port_fn_hw_addr_set: Callback used to set port function's hardware address. + * Should be used by device drivers to set the hardware + * address of a function managed by the devlink port. + * @port_fn_roce_get: Callback used to get port function's RoCE capability. + * Should be used by device drivers to report + * the current state of RoCE capability of a function + * managed by the devlink port. + * @port_fn_roce_set: Callback used to set port function's RoCE capability. + * Should be used by device drivers to enable/disable + * RoCE capability of a function managed + * by the devlink port. + * @port_fn_migratable_get: Callback used to get port function's migratable + * capability. Should be used by device drivers + * to report the current state of migratable capability + * of a function managed by the devlink port. + * @port_fn_migratable_set: Callback used to set port function's migratable + * capability. Should be used by device drivers + * to enable/disable migratable capability of + * a function managed by the devlink port. + * @port_fn_state_get: Callback used to get port function's state. + * Should be used by device drivers to report + * the current admin and operational state of a + * function managed by the devlink port. + * @port_fn_state_set: Callback used to get port function's state. + * Should be used by device drivers set + * the admin state of a function managed + * by the devlink port. + * + * Note: Driver should return -EOPNOTSUPP if it doesn't support + * port function (@port_fn_*) handling for a particular port. + */ +struct devlink_port_ops { + int (*port_split)(struct devlink *devlink, struct devlink_port *port, + unsigned int count, struct netlink_ext_ack *extack); + int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack); + int (*port_type_set)(struct devlink_port *devlink_port, + enum devlink_port_type port_type); + int (*port_del)(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack); + int (*port_fn_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, + int *hw_addr_len, + struct netlink_ext_ack *extack); + int (*port_fn_hw_addr_set)(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack); + int (*port_fn_roce_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + int (*port_fn_roce_set)(struct devlink_port *devlink_port, + bool enable, struct netlink_ext_ack *extack); + int (*port_fn_migratable_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + int (*port_fn_migratable_set)(struct devlink_port *devlink_port, + bool enable, + struct netlink_ext_ack *extack); + int (*port_fn_state_get)(struct devlink_port *port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack); + int (*port_fn_state_set)(struct devlink_port *port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack); +}; + void devlink_port_init(struct devlink *devlink, struct devlink_port *devlink_port); void devlink_port_fini(struct devlink_port *devlink_port); -int devl_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index); -int devlink_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index); + +int devl_port_register_with_ops(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index, + const struct devlink_port_ops *ops); + +static inline int devl_port_register(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index) +{ + return devl_port_register_with_ops(devlink, devlink_port, + port_index, NULL); +} + +int devlink_port_register_with_ops(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index, + const struct devlink_port_ops *ops); + +static inline int devlink_port_register(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index) +{ + return devlink_port_register_with_ops(devlink, devlink_port, + port_index, NULL); +} + void devl_port_unregister(struct devlink_port *devlink_port); void devlink_port_unregister(struct devlink_port *devlink_port); void devlink_port_type_eth_set(struct devlink_port *devlink_port); diff --git a/include/net/dsa.h b/include/net/dsa.h index ab0f0a5b0860..d309ee7ed04b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -314,9 +314,17 @@ struct dsa_port { struct list_head fdbs; struct list_head mdbs; - /* List of VLANs that CPU and DSA ports are members of. */ struct mutex vlans_lock; - struct list_head vlans; + union { + /* List of VLANs that CPU and DSA ports are members of. + * Access to this is serialized by the sleepable @vlans_lock. + */ + struct list_head vlans; + /* List of VLANs that user ports are members of. + * Access to this is serialized by netif_addr_lock_bh(). + */ + struct list_head user_vlans; + }; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, @@ -867,9 +875,15 @@ struct dsa_switch_ops { phy_interface_t iface); int (*phylink_mac_link_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); + int (*phylink_mac_prepare)(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface); void (*phylink_mac_config)(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); + int (*phylink_mac_finish)(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface); void (*phylink_mac_an_restart)(struct dsa_switch *ds, int port); void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, unsigned int mode, diff --git a/include/net/flow.h b/include/net/flow.h index bb8651a6eaa7..7f0adda3bf2f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -116,11 +116,10 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, } /* Reset some input parameters after previous lookup */ -static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos, +static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __be32 daddr, __be32 saddr) { fl4->flowi4_oif = oif; - fl4->flowi4_tos = tos; fl4->daddr = daddr; fl4->saddr = saddr; } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 85b2281576ed..8664ed4fbbdf 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -243,10 +243,12 @@ struct flow_dissector_key_ip { * struct flow_dissector_key_meta: * @ingress_ifindex: ingress ifindex * @ingress_iftype: ingress interface type + * @l2_miss: packet did not match an L2 entry during forwarding */ struct flow_dissector_key_meta { int ingress_ifindex; u16 ingress_iftype; + u8 l2_miss; }; /** @@ -299,6 +301,26 @@ struct flow_dissector_key_l2tpv3 { __be32 session_id; }; +/** + * struct flow_dissector_key_cfm + * @mdl_ver: maintenance domain level (mdl) and cfm protocol version + * @opcode: code specifying a type of cfm protocol packet + * + * See 802.1ag, ITU-T G.8013/Y.1731 + * 1 2 + * |7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | mdl | version | opcode | + * +-----+---------+-+-+-+-+-+-+-+-+ + */ +struct flow_dissector_key_cfm { + u8 mdl_ver; + u8 opcode; +}; + +#define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) +#define FLOW_DIS_CFM_MDL_MAX 7 + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -331,6 +353,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */ FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */ FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ + FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/gro.h b/include/net/gro.h index a4fab706240d..88644b3ca660 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -11,11 +11,23 @@ #include <net/udp.h> struct napi_gro_cb { - /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ - void *frag0; + union { + struct { + /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ + void *frag0; - /* Length of frag0. */ - unsigned int frag0_len; + /* Length of frag0. */ + unsigned int frag0_len; + }; + + struct { + /* used in skb_gro_receive() slow path */ + struct sk_buff *last; + + /* jiffies when first packet was created/queued */ + unsigned long age; + }; + }; /* This indicates where we are processing relative to skb->data. */ int data_offset; @@ -32,9 +44,6 @@ struct napi_gro_cb { /* Used in ipv6_gro_receive() and foo-over-udp */ u16 proto; - /* jiffies when first packet was created/queued */ - unsigned long age; - /* Used in napi_gro_cb::free */ #define NAPI_GRO_FREE 1 #define NAPI_GRO_FREE_STOLEN_HEAD 2 @@ -77,9 +86,6 @@ struct napi_gro_cb { /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; - - /* used in skb_gro_receive() slow path */ - struct sk_buff *last; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) @@ -446,5 +452,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, gro_normal_list(napi); } +/* This function is the alternative of 'inet_iif' and 'inet_sdif' + * functions in case we can not rely on fields of IPCB. + * + * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. + * The caller must hold the RCU read lock. + */ +static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) +{ + *iif = inet_iif(skb) ?: skb->dev->ifindex; + *sdif = 0; + +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (netif_is_l3_slave(skb->dev)) { + struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); + + *sdif = *iif; + *iif = master ? master->ifindex : 0; + } +#endif +} + +/* This function is the alternative of 'inet6_iif' and 'inet6_sdif' + * functions in case we can not rely on fields of IP6CB. + * + * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. + * The caller must hold the RCU read lock. + */ +static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) +{ + /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ + *iif = skb->dev->ifindex; + *sdif = 0; + +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (netif_is_l3_slave(skb->dev)) { + struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); + + *sdif = *iif; + *iif = master ? master->ifindex : 0; + } +#endif +} + +extern struct list_head offload_base; #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/gso.h b/include/net/gso.h new file mode 100644 index 000000000000..29975440cad5 --- /dev/null +++ b/include/net/gso.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_GSO_H +#define _NET_GSO_H + +#include <linux/skbuff.h> + +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. + * Keeps track of level of encapsulation of network headers. + */ +struct skb_gso_cb { + union { + int mac_offset; + int data_offset; + }; + int encap_level; + __wsum csum; + __u16 csum_start; +}; +#define SKB_GSO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + +static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) +{ + int new_headroom, headroom; + int ret; + + headroom = skb_headroom(skb); + ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); + if (ret) + return ret; + + new_headroom = skb_headroom(skb); + SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); + return 0; +} + +static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res) +{ + /* Do not update partial checksums if remote checksum is enabled. */ + if (skb->remcsum_offload) + return; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head; +} + +/* Compute the checksum for a gso segment. First compute the checksum value + * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and + * then add in skb->csum (checksum from csum_start to end of packet). + * skb->csum and csum_start are then updated to reflect the checksum of the + * resultant packet starting from the transport header-- the resultant checksum + * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo + * header. + */ +static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) +{ + unsigned char *csum_start = skb_transport_header(skb); + int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start; + __wsum partial = SKB_GSO_CB(skb)->csum; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = csum_start - skb->head; + + return csum_fold(csum_partial(csum_start, plen, partial)); +} + +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path); + +static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + return __skb_gso_segment(skb, features, true); +} + +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type); + +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features); + +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); + +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); + +static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, + int pulled_hlen, u16 mac_offset, + int mac_len) +{ + skb->protocol = protocol; + skb->encapsulation = 1; + skb_push(skb, pulled_hlen); + skb_reset_transport_header(skb); + skb->mac_header = mac_offset; + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; +} + +#endif /* _NET_GSO_H */ diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index f980a72f2ce6..c4722a9963de 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -535,6 +535,8 @@ enum ieee80211_radiotap_eht_usig_common { IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN = 0x00000008, IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN = 0x00000010, IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC = 0x00000020, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_CHECKED = 0x00000040, + IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK = 0x00000080, IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000, IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000, diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index da8a3e648c7a..063313df447d 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -74,6 +74,10 @@ struct ieee802154_beacon_hdr { #endif } __packed; +struct ieee802154_mac_cmd_pl { + u8 cmd_id; +} __packed; + struct ieee802154_sechdr { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 level:3, @@ -149,6 +153,16 @@ struct ieee802154_beacon_frame { struct ieee802154_beacon_hdr mac_pl; }; +struct ieee802154_mac_cmd_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; +}; + +struct ieee802154_beacon_req_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; +}; + /* pushes hdr onto the skb. fields of hdr->fc that can be calculated from * the contents of hdr will be, and the actual value of those bits in * hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame @@ -174,9 +188,13 @@ int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, */ int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr); -/* pushes a beacon frame into an skb */ +/* pushes/pulls various frame types into/from an skb */ int ieee802154_beacon_push(struct sk_buff *skb, struct ieee802154_beacon_frame *beacon); +int ieee802154_mac_cmd_push(struct sk_buff *skb, void *frame, + const void *pl, unsigned int pl_len); +int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb, + struct ieee802154_mac_cmd_pl *mac_pl); int ieee802154_max_payload(const struct ieee802154_hdr *hdr); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index cec453c18f1d..b86b8e21de7f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -31,10 +31,11 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern); +void __inet_accept(struct socket *sock, struct socket *newsock, + struct sock *newsk); int inet_send_prepare(struct sock *sk); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); -ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags); +void inet_splice_eof(struct socket *sock); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int inet_shutdown(struct socket *sock, int how); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 325ad893f624..153960663ce4 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -29,7 +29,7 @@ struct fqdir { }; /** - * fragment queue flags + * enum: fragment queue flags * * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index caa20a905531..491ceb7ebe5d 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) + u32 mark = READ_ONCE(sk->sk_mark); + + if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; - return sk->sk_mark; + return mark; } static inline int inet_request_bound_dev_if(const struct sock *sk, @@ -221,8 +222,8 @@ struct inet_sock { __s16 uc_ttl; __u16 cmsg_flags; struct ip_options_rcu __rcu *inet_opt; + atomic_t inet_id; __be16 inet_sport; - __u16 inet_id; __u8 tos; __u8 min_ttl; diff --git a/include/net/ip.h b/include/net/ip.h index acec504c469a..19adacd5ece0 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, { ipcm_init(ipcm); - ipcm->sockc.mark = inet->sk.sk_mark; + ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; @@ -222,8 +222,6 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4, unsigned int flags); int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); -ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, - int offset, size_t size, int flags); struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); @@ -244,14 +242,22 @@ static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base); } -static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) +/* Get the route scope that should be used when sending a packet. */ +static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, + const struct ipcm_cookie *ipc, + const struct msghdr *msg) { - return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos); + if (sock_flag(&inet->sk, SOCK_LOCALROUTE) || + msg->msg_flags & MSG_DONTROUTE || + (ipc->opt && ipc->opt->opt.is_strictroute)) + return RT_SCOPE_LINK; + + return RT_SCOPE_UNIVERSE; } -static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk) +static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) { - return (ipc->tos != -1) ? RT_CONN_FLAGS_TOS(sk, ipc->tos) : RT_CONN_FLAGS(sk); + return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos); } /* datagram.c */ @@ -282,7 +288,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, - unsigned int len, u64 transmit_time); + unsigned int len, u64 transmit_time, u32 txhash); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) @@ -532,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, * generator as much as we can. */ if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; + int val; + + /* avoid atomic operations for TCP, + * as we hold socket lock at this point. + */ + if (sk_is_tcp(sk)) { + sock_owned_by_me(sk); + val = atomic_read(&inet_sk(sk)->inet_id); + atomic_set(&inet_sk(sk)->inet_id, val + segs); + } else { + val = atomic_add_return(segs, &inet_sk(sk)->inet_id); + } + iph->id = htons(val); return; } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7332296eca44..2acc4c808d45 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -752,12 +752,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a) /* more secured version of ipv6_addr_hash() */ static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) { - u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; - - return jhash_3words(v, - (__force u32)a->s6_addr32[2], - (__force u32)a->s6_addr32[3], - initval); + return jhash2((__force const u32 *)a->s6_addr32, + ARRAY_SIZE(a->s6_addr32), initval); } static inline bool ipv6_addr_loopback(const struct in6_addr *a) diff --git a/include/net/kcm.h b/include/net/kcm.h index 2d704f8f4905..90279e5e09a5 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -47,9 +47,9 @@ struct kcm_stats { struct kcm_tx_msg { unsigned int sent; - unsigned int fragidx; unsigned int frag_offset; unsigned int msg_flags; + bool started_tx; struct sk_buff *frag_skb; struct sk_buff *last_skb; }; diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 2c1ea3414640..374411b3066c 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -111,7 +111,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, u16 *how_many_unacked); struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, - struct llc_addr *laddr); + struct llc_addr *laddr, const struct net *net); void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk); void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk); diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 49aa79c7b278..7e73f8e5e497 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -269,7 +269,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) /** * llc_pdu_decode_da - extracts dest address of input frame * @skb: input skb that destination address must be extracted from it - * @sa: pointer to destination address (6 byte array). + * @da: pointer to destination address (6 byte array). * * This function extracts destination address(MAC) of input frame. */ @@ -321,7 +321,7 @@ static inline void llc_pdu_init_as_ui_cmd(struct sk_buff *skb) /** * llc_pdu_init_as_test_cmd - sets PDU as TEST - * @skb - Address of the skb to build + * @skb: Address of the skb to build * * Sets a PDU as TEST */ @@ -369,6 +369,8 @@ struct llc_xid_info { /** * llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID * @skb: input skb that header must be set into it. + * @svcs_supported: The class of the LLC (I or II) + * @rx_window: The size of the receive window of the LLC * * This function sets third,fourth,fifth and sixth bytes of LLC header as * a XID PDU. diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ac0370e76874..2a55ae932c56 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation */ #ifndef MAC80211_H @@ -1755,12 +1755,15 @@ struct ieee80211_channel_switch { * @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes * and send P2P_PS notification to the driver if NOA changed, even * this is not pure P2P vif. + * @IEEE80211_VIF_DISABLE_SMPS_OVERRIDE: disable user configuration of + * SMPS mode via debugfs. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1), IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), + IEEE80211_VIF_DISABLE_SMPS_OVERRIDE = BIT(4), }; @@ -1790,6 +1793,9 @@ enum ieee80211_offload_flags { * @ps: power-save mode (STA only). This flag is NOT affected by * offchannel/dynamic_ps operations. * @aid: association ID number, valid only when @assoc is true + * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k. + * @eml_med_sync_delay: Medium Synchronization delay as described in + * P802.11be_D2.2 Figure 9-1002j. * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The * may filter ARP queries targeted for other addresses than listed here. * The driver must allow ARP queries targeted for all address listed here @@ -1812,6 +1818,8 @@ struct ieee80211_vif_cfg { bool ibss_creator; bool ps; u16 aid; + u16 eml_cap; + u16 eml_med_sync_delay; __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; int arp_addr_cnt; @@ -1838,6 +1846,8 @@ struct ieee80211_vif_cfg { * @active_links: The bitmap of active links, or 0 for non-MLO. * The driver shouldn't change this directly, but use the * API calls meant for that purpose. + * @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO. + * Must be a subset of valid_links. * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively @@ -1875,7 +1885,7 @@ struct ieee80211_vif { struct ieee80211_vif_cfg cfg; struct ieee80211_bss_conf bss_conf; struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS]; - u16 valid_links, active_links; + u16 valid_links, active_links, dormant_links; u8 addr[ETH_ALEN] __aligned(2); bool p2p; @@ -1901,6 +1911,27 @@ struct ieee80211_vif { u8 drv_priv[] __aligned(sizeof(void *)); }; +/** + * ieee80211_vif_usable_links - Return the usable links for the vif + * @vif: the vif for which the usable links are requested + * Return: the usable link bitmap + */ +static inline u16 ieee80211_vif_usable_links(const struct ieee80211_vif *vif) +{ + return vif->valid_links & ~vif->dormant_links; +} + +/** + * ieee80211_vif_is_mld - Returns true iff the vif is an MLD one + * @vif: the vif + * Return: %true if the vif is an MLD, %false otherwise. + */ +static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif) +{ + /* valid_links != 0 indicates this vif is an MLD */ + return vif->valid_links != 0; +} + #define for_each_vif_active_link(vif, link, link_id) \ for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ if ((!(vif)->active_links || \ @@ -3842,7 +3873,7 @@ struct ieee80211_prep_tx_info { * * @link_sta_add_debugfs: Drivers can use this callback to add debugfs files * when a link is added to a mac80211 station. This callback - * should be within a CPTCFG_MAC80211_DEBUGFS conditional. This + * should be within a CONFIG_MAC80211_DEBUGFS conditional. This * callback can sleep. * For non-MLO the callback will be called once for the deflink with the * station's directory rather than a separate subdirectory. @@ -5251,7 +5282,8 @@ struct ieee80211_mutable_offsets { * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @offs: &struct ieee80211_mutable_offsets pointer to struct that will * receive the offsets that may be updated by the driver. - * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP) + * @link_id: the link id to which the beacon belongs (or 0 for an AP STA + * that is not associated with AP MLD). * * If the driver implements beaconing modes, it must use this function to * obtain the beacon template. @@ -5348,7 +5380,8 @@ void ieee80211_beacon_free_ema_list(struct ieee80211_ema_beacons *ema_beacons); * @tim_length: pointer to variable that will receive the TIM IE length, * (including the ID and length bytes!). * Set to 0 if invalid (in non-AP modes). - * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP) + * @link_id: the link id to which the beacon belongs (or 0 for an AP STA + * that is not associated with AP MLD). * * If the driver implements beaconing modes, it must use this function to * obtain the beacon frame. @@ -5371,7 +5404,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, * ieee80211_beacon_get - beacon generation function * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * @link_id: the link id to which the beacon belongs (or 0 for a non-MLD AP) + * @link_id: the link id to which the beacon belongs (or 0 for an AP STA + * that is not associated with AP MLD). * * See ieee80211_beacon_get_tim(). * @@ -6578,6 +6612,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, * marks frames marked in the bitmap as having been filtered. Afterwards, it * checks if any frames in the window starting from @ssn can now be released * (in case they were only waiting for frames that were filtered.) + * (Only work correctly if @max_rx_aggregation_subframes <= 64 frames) */ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, u16 ssn, u64 filtered, @@ -6862,6 +6897,48 @@ ieee80211_vif_type_p2p(struct ieee80211_vif *vif) } /** + * ieee80211_get_he_iftype_cap_vif - return HE capabilities for sband/vif + * @sband: the sband to search for the iftype on + * @vif: the vif to get the iftype from + * + * Return: pointer to the struct ieee80211_sta_he_cap, or %NULL is none found + */ +static inline const struct ieee80211_sta_he_cap * +ieee80211_get_he_iftype_cap_vif(const struct ieee80211_supported_band *sband, + struct ieee80211_vif *vif) +{ + return ieee80211_get_he_iftype_cap(sband, ieee80211_vif_type_p2p(vif)); +} + +/** + * ieee80211_get_he_6ghz_capa_vif - return HE 6 GHz capabilities + * @sband: the sband to search for the STA on + * @vif: the vif to get the iftype from + * + * Return: the 6GHz capabilities + */ +static inline __le16 +ieee80211_get_he_6ghz_capa_vif(const struct ieee80211_supported_band *sband, + struct ieee80211_vif *vif) +{ + return ieee80211_get_he_6ghz_capa(sband, ieee80211_vif_type_p2p(vif)); +} + +/** + * ieee80211_get_eht_iftype_cap_vif - return ETH capabilities for sband/vif + * @sband: the sband to search for the iftype on + * @vif: the vif to get the iftype from + * + * Return: pointer to the struct ieee80211_sta_eht_cap, or %NULL is none found + */ +static inline const struct ieee80211_sta_eht_cap * +ieee80211_get_eht_iftype_cap_vif(const struct ieee80211_supported_band *sband, + struct ieee80211_vif *vif) +{ + return ieee80211_get_eht_iftype_cap(sband, ieee80211_vif_type_p2p(vif)); +} + +/** * ieee80211_update_mu_groups - set the VHT MU-MIMO groud data * * @vif: the specified virtual interface diff --git a/include/net/macsec.h b/include/net/macsec.h index 5b9c61c4d3a6..441ed8fd4b5f 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -8,6 +8,7 @@ #define _NET_MACSEC_H_ #include <linux/u64_stats_sync.h> +#include <linux/if_vlan.h> #include <uapi/linux/if_link.h> #include <uapi/linux/if_macsec.h> @@ -312,4 +313,13 @@ static inline bool macsec_send_sci(const struct macsec_secy *secy) (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); } +static inline void *macsec_netdev_priv(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_VLAN_8021Q) + if (is_vlan_dev(dev)) + return netdev_priv(vlan_dev_priv(dev)->real_dev); +#endif + return netdev_priv(dev); +} + #endif /* _NET_MACSEC_H_ */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9eef19972845..024ad8ddb27e 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -579,7 +579,7 @@ struct mana_fence_rq_resp { }; /* HW DATA */ /* Configure vPort Rx Steering */ -struct mana_cfg_rx_steer_req { +struct mana_cfg_rx_steer_req_v2 { struct gdma_req_hdr hdr; mana_handle_t vport; u16 num_indir_entries; @@ -592,6 +592,8 @@ struct mana_cfg_rx_steer_req { u8 reserved; mana_handle_t default_rxobj; u8 hashkey[MANA_HASH_KEY_SIZE]; + u8 cqe_coalescing_enable; + u8 reserved2[7]; }; /* HW DATA */ struct mana_cfg_rx_steer_resp { diff --git a/include/net/mctp.h b/include/net/mctp.h index 82800d521c3d..da86e106c91d 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -234,9 +234,9 @@ struct mctp_flow { struct mctp_route { mctp_eid_t min, max; - struct mctp_dev *dev; - unsigned int mtu; unsigned char type; + unsigned int mtu; + struct mctp_dev *dev; int (*output)(struct mctp_route *route, struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 0855b60fba17..cf0d81be5a96 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -26,6 +26,15 @@ struct nf_conntrack_expect { struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_mask mask; + /* Usage count. */ + refcount_t use; + + /* Flags */ + unsigned int flags; + + /* Expectation class */ + unsigned int class; + /* Function to call after setup and insertion */ void (*expectfn)(struct nf_conn *new, struct nf_conntrack_expect *this); @@ -39,15 +48,6 @@ struct nf_conntrack_expect { /* Timer function; deletes the expectation. */ struct timer_list timeout; - /* Usage count. */ - refcount_t use; - - /* Flags */ - unsigned int flags; - - /* Expectation class */ - unsigned int class; - #if IS_ENABLED(CONFIG_NF_NAT) union nf_inet_addr saved_addr; /* This is the original per-proto part, used to map the diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 9334371c94e2..f7dd950ff250 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -67,6 +67,9 @@ struct nf_conntrack_tuple { /* The protocol. */ u_int8_t protonum; + /* The direction must be ignored for the tuplehash */ + struct { } __nfct_hash_offsetend; + /* The direction (for tuplehash) */ u_int8_t dir; } dst; diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index f37f9f34430c..d466e1a3b0b1 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -263,8 +263,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, up_write(&flow_table->flow_block_lock); } -int flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); +void flow_offload_route_init(struct flow_offload *flow, + const struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ee47d7143d99..dd40c75011d2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -512,6 +512,7 @@ struct nft_set_elem_expr { * * @list: table set list node * @bindings: list of set bindings + * @refs: internal refcounting for async set destruction * @table: table this set belongs to * @net: netnamespace this set belongs to * @name: name of the set @@ -533,6 +534,7 @@ struct nft_set_elem_expr { * @expr: stateful expression * @ops: set ops * @flags: set flags + * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length @@ -541,6 +543,7 @@ struct nft_set_elem_expr { struct nft_set { struct list_head list; struct list_head bindings; + refcount_t refs; struct nft_table *table; possible_net_t net; char *name; @@ -562,7 +565,8 @@ struct nft_set { struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + dead:1, genmask:2; u8 klen; u8 dlen; @@ -583,6 +587,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline bool nft_set_gc_is_pending(const struct nft_set *s) +{ + return refcount_read(&s->refs) != 1; +} + static inline struct nft_set *nft_set_container_of(const void *priv) { return (void *)priv - offsetof(struct nft_set, data); @@ -596,7 +605,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net, struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, const struct nft_set *set); -void *nft_set_catchall_gc(const struct nft_set *set); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { @@ -813,62 +821,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, void *elem); -/** - * struct nft_set_gc_batch_head - nf_tables set garbage collection batch - * - * @rcu: rcu head - * @set: set the elements belong to - * @cnt: count of elements - */ -struct nft_set_gc_batch_head { - struct rcu_head rcu; - const struct nft_set *set; - unsigned int cnt; -}; - -#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ - sizeof(struct nft_set_gc_batch_head)) / \ - sizeof(void *)) - -/** - * struct nft_set_gc_batch - nf_tables set garbage collection batch - * - * @head: GC batch head - * @elems: garbage collection elements - */ -struct nft_set_gc_batch { - struct nft_set_gc_batch_head head; - void *elems[NFT_SET_GC_BATCH_SIZE]; -}; - -struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, - gfp_t gfp); -void nft_set_gc_batch_release(struct rcu_head *rcu); - -static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) -{ - if (gcb != NULL) - call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); -} - -static inline struct nft_set_gc_batch * -nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, - gfp_t gfp) -{ - if (gcb != NULL) { - if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) - return gcb; - nft_set_gc_batch_complete(gcb); - } - return nft_set_gc_batch_alloc(set, gfp); -} - -static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, - void *elem) -{ - gcb->elems[gcb->head.cnt++] = elem; -} - struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type @@ -1211,6 +1163,29 @@ int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); +static inline bool nft_use_inc(u32 *use) +{ + if (*use == UINT_MAX) + return false; + + (*use)++; + + return true; +} + +static inline void nft_use_dec(u32 *use) +{ + WARN_ON_ONCE((*use)-- == 0); +} + +/* For error and abort path: restore use counter to previous state. */ +static inline void nft_use_inc_restore(u32 *use) +{ + WARN_ON_ONCE(!nft_use_inc(use)); +} + +#define nft_use_dec_restore nft_use_dec + /** * struct nft_table - nf_tables table * @@ -1296,8 +1271,8 @@ struct nft_object { struct list_head list; struct rhlist_head rhlhead; struct nft_object_hash_key key; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; u16 udlen; u8 *udata; @@ -1399,8 +1374,8 @@ struct nft_flowtable { char *name; int hooknum; int ops_len; - u32 genmask:2, - use:30; + u32 genmask:2; + u32 use; u64 handle; /* runtime data below here */ struct list_head hook_list ____cacheline_aligned; @@ -1534,39 +1509,30 @@ static inline void nft_set_elem_change_active(const struct net *net, #endif /* IS_ENABLED(CONFIG_NF_TABLES) */ -/* - * We use a free bit in the genmask field to indicate the element - * is busy, meaning it is currently being processed either by - * the netlink API or GC. - * - * Even though the genmask is only a single byte wide, this works - * because the extension structure if fully constant once initialized, - * so there are no non-atomic write accesses unless it is already - * marked busy. - */ -#define NFT_SET_ELEM_BUSY_MASK (1 << 2) +#define NFT_SET_ELEM_DEAD_MASK (1 << 2) #if defined(__LITTLE_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT 2 +#define NFT_SET_ELEM_DEAD_BIT 2 #elif defined(__BIG_ENDIAN_BITFIELD) -#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) +#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) #else #error #endif -static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) +static inline void nft_set_elem_dead(struct nft_set_ext *ext) { unsigned long *word = (unsigned long *)ext; BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); - return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); + set_bit(NFT_SET_ELEM_DEAD_BIT, word); } -static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) +static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) { unsigned long *word = (unsigned long *)ext; - clear_bit(NFT_SET_ELEM_BUSY_BIT, word); + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_bit(NFT_SET_ELEM_DEAD_BIT, word); } /** @@ -1611,6 +1577,7 @@ struct nft_trans_set { u64 timeout; bool update; bool bound; + u32 size; }; #define nft_trans_set(trans) \ @@ -1625,6 +1592,8 @@ struct nft_trans_set { (((struct nft_trans_set *)trans->data)->timeout) #define nft_trans_set_gc_int(trans) \ (((struct nft_trans_set *)trans->data)->gc_int) +#define nft_trans_set_size(trans) \ + (((struct nft_trans_set *)trans->data)->size) struct nft_trans_chain { struct nft_chain *chain; @@ -1706,6 +1675,38 @@ struct nft_trans_flowtable { #define nft_trans_flowtable_flags(trans) \ (((struct nft_trans_flowtable *)trans->data)->flags) +#define NFT_TRANS_GC_BATCHCOUNT 256 + +struct nft_trans_gc { + struct list_head list; + struct net *net; + struct nft_set *set; + u32 seq; + u8 count; + void *priv[NFT_TRANS_GC_BATCHCOUNT]; + struct rcu_head rcu; +}; + +struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_destroy(struct nft_trans_gc *trans); + +struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc, + unsigned int gc_seq, gfp_t gfp); +void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc); + +struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp); +void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans); + +void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv); + +struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, + unsigned int gc_seq); + +void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); @@ -1732,6 +1733,8 @@ struct nftables_pernet { struct mutex commit_mutex; u64 table_handle; unsigned int base_seq; + unsigned int gc_seq; + u8 validate_state; }; extern unsigned int nf_tables_net_id; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index db762e35aca9..f00374718159 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -65,6 +65,7 @@ struct netns_ipv4 { #endif bool fib_has_custom_local_routes; bool fib_offload_disabled; + u8 sysctl_tcp_shrink_window; #ifdef CONFIG_IP_ROUTE_CLASSID atomic_t fib_num_tclassid_users; #endif @@ -194,6 +195,7 @@ struct netns_ipv4 { int sysctl_udp_rmem_min; u8 sysctl_fib_notify_on_flag_change; + u8 sysctl_tcp_syn_linear_timeouts; #ifdef CONFIG_NET_L3_MASTER_DEV u8 sysctl_udp_l3mdev_accept; diff --git a/include/net/nsh.h b/include/net/nsh.h index 350b1ad11c7f..16a751093896 100644 --- a/include/net/nsh.h +++ b/include/net/nsh.h @@ -192,7 +192,7 @@ /** * struct nsh_md1_ctx - Keeps track of NSH context data - * @nshc<1-4>: NSH Contexts. + * @context: NSH Contexts. */ struct nsh_md1_ctx { __be32 context[4]; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 862f1719b523..cf5ecae4a2fc 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -109,4 +109,25 @@ void phonet_sysctl_exit(void); int isi_register(void); void isi_unregister(void); +static inline bool sk_is_phonet(struct sock *sk) +{ + return sk->sk_family == PF_PHONET; +} + +static inline int phonet_sk_ioctl(struct sock *sk, unsigned int cmd, + void __user *arg) +{ + int karg; + + switch (cmd) { + case SIOCPNADDRESOURCE: + case SIOCPNDELRESOURCE: + if (get_user(karg, (int __user *)arg)) + return -EFAULT; + + return sk->sk_prot->ioctl(sk, cmd, &karg); + } + /* A positive return value means that the ioctl was not processed */ + return 1; +} #endif diff --git a/include/net/pie.h b/include/net/pie.h index 3fe2361e03b4..01cbc66825a4 100644 --- a/include/net/pie.h +++ b/include/net/pie.h @@ -17,7 +17,7 @@ /** * struct pie_params - contains pie parameters * @target: target delay in pschedtime - * @tudpate: interval at which drop probability is calculated + * @tupdate: interval at which drop probability is calculated * @limit: total number of packets that can be in the queue * @alpha: parameter to control drop probability * @beta: parameter to control drop probability diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b3b5b0b62f16..a2ea45c7b53e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -868,6 +868,7 @@ struct tc_htb_qopt_offload { u16 qid; u64 rate; u64 ceil; + u8 prio; }; #define TC_HTB_CLASSID_ROOT U32_MAX diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 5722931d83d4..15960564e0c3 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -134,7 +134,7 @@ extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; */ static inline unsigned int psched_mtu(const struct net_device *dev) { - return dev->mtu + dev->hard_header_len; + return READ_ONCE(dev->mtu) + dev->hard_header_len; } static inline struct net *qdisc_net(struct Qdisc *q) @@ -187,6 +187,32 @@ struct tc_taprio_caps { bool broken_mqprio:1; }; +enum tc_taprio_qopt_cmd { + TAPRIO_CMD_REPLACE, + TAPRIO_CMD_DESTROY, + TAPRIO_CMD_STATS, + TAPRIO_CMD_QUEUE_STATS, +}; + +/** + * struct tc_taprio_qopt_stats - IEEE 802.1Qbv statistics + * @window_drops: Frames that were dropped because they were too large to be + * transmitted in any of the allotted time windows (open gates) for their + * traffic class. + * @tx_overruns: Frames still being transmitted by the MAC after the + * transmission gate associated with their traffic class has closed. + * Equivalent to `12.29.1.1.2 TransmissionOverrun` from 802.1Q-2018. + */ +struct tc_taprio_qopt_stats { + u64 window_drops; + u64 tx_overruns; +}; + +struct tc_taprio_qopt_queue_stats { + int queue; + struct tc_taprio_qopt_stats stats; +}; + struct tc_taprio_sched_entry { u8 command; /* TC_TAPRIO_CMD_* */ @@ -196,16 +222,26 @@ struct tc_taprio_sched_entry { }; struct tc_taprio_qopt_offload { - struct tc_mqprio_qopt_offload mqprio; - struct netlink_ext_ack *extack; - u8 enable; - ktime_t base_time; - u64 cycle_time; - u64 cycle_time_extension; - u32 max_sdu[TC_MAX_QUEUE]; - - size_t num_entries; - struct tc_taprio_sched_entry entries[]; + enum tc_taprio_qopt_cmd cmd; + + union { + /* TAPRIO_CMD_STATS */ + struct tc_taprio_qopt_stats stats; + /* TAPRIO_CMD_QUEUE_STATS */ + struct tc_taprio_qopt_queue_stats queue_stats; + /* TAPRIO_CMD_REPLACE */ + struct { + struct tc_mqprio_qopt_offload mqprio; + struct netlink_ext_ack *extack; + ktime_t base_time; + u64 cycle_time; + u64 cycle_time_extension; + u32 max_sdu[TC_MAX_QUEUE]; + + size_t num_entries; + struct tc_taprio_sched_entry entries[]; + }; + }; }; #if IS_ENABLED(CONFIG_NET_SCH_TAPRIO) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 896191f420d5..b2cb4a9eb04d 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -140,17 +140,6 @@ struct regulatory_request { * otherwise initiating radiation is not allowed. This will enable the * relaxations enabled under the CFG80211_REG_RELAX_NO_IR configuration * option - * @REGULATORY_IGNORE_STALE_KICKOFF: the regulatory core will _not_ make sure - * all interfaces on this wiphy reside on allowed channels. If this flag - * is not set, upon a regdomain change, the interfaces are given a grace - * period (currently 60 seconds) to disconnect or move to an allowed - * channel. Interfaces on forbidden channels are forcibly disconnected. - * Currently these types of interfaces are supported for enforcement: - * NL80211_IFTYPE_ADHOC, NL80211_IFTYPE_STATION, NL80211_IFTYPE_AP, - * NL80211_IFTYPE_AP_VLAN, NL80211_IFTYPE_MONITOR, - * NL80211_IFTYPE_P2P_CLIENT, NL80211_IFTYPE_P2P_GO, - * NL80211_IFTYPE_P2P_DEVICE. The flag will be set by default if a device - * includes any modes unsupported for enforcement checking. * @REGULATORY_WIPHY_SELF_MANAGED: for devices that employ wiphy-specific * regdom management. These devices will ignore all regdom changes not * originating from their own wiphy. @@ -177,7 +166,7 @@ enum ieee80211_regulatory_flags { REGULATORY_COUNTRY_IE_FOLLOW_POWER = BIT(3), REGULATORY_COUNTRY_IE_IGNORE = BIT(4), REGULATORY_ENABLE_RELAX_NO_IR = BIT(5), - REGULATORY_IGNORE_STALE_KICKOFF = BIT(6), + /* reuse bit 6 next time */ REGULATORY_WIPHY_SELF_MANAGED = BIT(7), }; diff --git a/include/net/route.h b/include/net/route.h index bcc367cf3aa2..8c2a8e7d8f8e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi __be16 dport, __be16 sport, __u8 proto, __u8 tos, int oif) { - flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, + flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); @@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk), + flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); } @@ -321,8 +321,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, if (IS_ERR(rt)) return rt; ip_rt_put(rt); - flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr, - fl4->saddr); + flowi4_update_output(fl4, oif, fl4->daddr, fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -337,8 +336,7 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable fl4->fl4_dport = dport; fl4->fl4_sport = sport; ip_rt_put(rt); - flowi4_update_output(fl4, sk->sk_bound_dev_if, - RT_CONN_FLAGS(sk), fl4->daddr, + flowi4_update_output(fl4, sk->sk_bound_dev_if, fl4->daddr, fl4->saddr); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); diff --git a/include/net/rpl.h b/include/net/rpl.h index 30fe780d1e7c..74734191c458 100644 --- a/include/net/rpl.h +++ b/include/net/rpl.h @@ -23,9 +23,6 @@ static inline int rpl_init(void) static inline void rpl_exit(void) {} #endif -size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri, - unsigned char cmpre); - void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr, const struct ipv6_rpl_sr_hdr *inhdr, const struct in6_addr *daddr, unsigned char n); diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h index 040f07b47f1f..b2283762e446 100644 --- a/include/net/rsi_91x.h +++ b/include/net/rsi_91x.h @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2017 Redpine Signals Inc. * * Permission to use, copy, modify, and/or distribute this software for any diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index d9076a7a430c..6506221c5fe3 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr * int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, u32 portid, const struct nlmsghdr *nlh); -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, - struct netlink_ext_ack *exterr); +int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, + struct netlink_ext_ack *exterr); struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 12eadecf8cd0..e92f73bb3198 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1190,20 +1190,6 @@ static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_DROP; } -/* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how - long it will take to send a packet given its size. - */ -static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) -{ - int slot = pktlen + rtab->rate.cell_align + rtab->rate.overhead; - if (slot < 0) - slot = 0; - slot >>= rtab->rate.cell_log; - if (slot > 255) - return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]; - return rtab->data[slot]; -} - struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; diff --git a/include/net/scm.h b/include/net/scm.h index 585adc1346bd..c5bcdf65f55c 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -120,15 +120,49 @@ static inline bool scm_has_secdata(struct socket *sock) } #endif /* CONFIG_SECURITY_NETWORK */ -static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg, - struct scm_cookie *scm, int flags) +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) +{ + struct file *pidfd_file = NULL; + int pidfd; + + /* + * put_cmsg() doesn't return an error if CMSG is truncated, + * that's why we need to opencode these checks here. + */ + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { + msg->msg_flags |= MSG_CTRUNC; + return; + } + + if (!scm->pid) + return; + + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); + + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { + if (pidfd_file) { + put_unused_fd(pidfd); + fput(pidfd_file); + } + + return; + } + + if (pidfd_file) + fd_install(pidfd, pidfd_file); +} + +static inline bool __scm_recv_common(struct socket *sock, struct msghdr *msg, + struct scm_cookie *scm, int flags) { if (!msg->msg_control) { - if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp || - scm_has_secdata(sock)) + if (test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags) || + scm->fp || scm_has_secdata(sock)) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); - return; + return false; } if (test_bit(SOCK_PASSCRED, &sock->flags)) { @@ -141,16 +175,34 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg, put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds); } - scm_destroy_cred(scm); - scm_passec(sock, msg, scm); - if (!scm->fp) + if (scm->fp) + scm_detach_fds(msg, scm); + + return true; +} + +static inline void scm_recv(struct socket *sock, struct msghdr *msg, + struct scm_cookie *scm, int flags) +{ + if (!__scm_recv_common(sock, msg, scm, flags)) return; - - scm_detach_fds(msg, scm); + + scm_destroy_cred(scm); } +static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg, + struct scm_cookie *scm, int flags) +{ + if (!__scm_recv_common(sock, msg, scm, flags)) + return; + + if (test_bit(SOCK_PASSPIDFD, &sock->flags)) + scm_pidfd_recv(msg, scm); + + scm_destroy_cred(scm); +} #endif /* __LINUX_NET_SCM_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 6f428a7f3567..690e22139543 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1258,7 +1258,7 @@ struct proto { bool kern); int (*ioctl)(struct sock *sk, int cmd, - unsigned long arg); + int *karg); int (*init)(struct sock *sk); void (*destroy)(struct sock *sk); void (*shutdown)(struct sock *sk, int how); @@ -1277,8 +1277,7 @@ struct proto { size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); - int (*sendpage)(struct sock *sk, struct page *page, - int offset, size_t size, int flags); + void (*splice_eof)(struct socket *sock); int (*bind)(struct sock *sk, struct sockaddr *addr, int addr_len); int (*bind_add)(struct sock *sk, @@ -1324,6 +1323,7 @@ struct proto { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. + * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes. * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ @@ -1421,6 +1421,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk) return sk->sk_prot->memory_pressure != NULL; } +static inline bool sk_under_global_memory_pressure(const struct sock *sk) +{ + return sk->sk_prot->memory_pressure && + !!READ_ONCE(*sk->sk_prot->memory_pressure); +} + static inline bool sk_under_memory_pressure(const struct sock *sk) { if (!sk->sk_prot->memory_pressure) @@ -1430,7 +1436,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; - return !!*sk->sk_prot->memory_pressure; + return !!READ_ONCE(*sk->sk_prot->memory_pressure); } static inline long @@ -1507,7 +1513,7 @@ proto_memory_pressure(struct proto *prot) { if (!prot->memory_pressure) return false; - return !!*prot->memory_pressure; + return !!READ_ONCE(*prot->memory_pressure); } @@ -1918,10 +1924,6 @@ int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len); int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); -ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags); -ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags); /* * Functions to fill in entries in struct proto_ops when a protocol @@ -2100,6 +2102,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) } kuid_t sock_i_uid(struct sock *sk); +unsigned long __sock_i_ino(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) @@ -2973,6 +2976,9 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval); int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, sockptr_t optval, int optlen, bool old_timeval); +int sock_ioctl_inout(struct sock *sk, unsigned int cmd, + void __user *arg, void *karg, size_t size); +int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { if (sk->sk_prot->sock_is_readable) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5066e4586cf0..0ca972ebd3dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -45,6 +45,7 @@ #include <linux/memcontrol.h> #include <linux/bpf-cgroup.h> #include <linux/siphash.h> +#include <linux/net_mm.h> extern struct inet_hashinfo tcp_hashinfo; @@ -161,8 +162,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_KEEPCNT 127 #define MAX_TCP_SYNCNT 127 -#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */ - #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24) #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated * after this time. It should be equal @@ -329,20 +328,16 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size, struct ubuf_info *uarg); -int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, - int flags); -int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags); -ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, - size_t size, int flags); +void tcp_splice_eof(struct socket *sock); int tcp_send_mss(struct sock *sk, int *size_goal, int flags); +int tcp_wmem_schedule(struct sock *sk, int copy); void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int size_goal); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); -int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int tcp_ioctl(struct sock *sk, int cmd, int *karg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); @@ -352,7 +347,7 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, +struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, bool force_schedule); void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); @@ -1514,25 +1509,38 @@ void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; + + /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl() + * and do_tcp_setsockopt(). + */ + val = READ_ONCE(tp->keepalive_intvl); - return tp->keepalive_intvl ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; + + /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */ + val = READ_ONCE(tp->keepalive_time); - return tp->keepalive_time ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; - return tp->keepalive_probes ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); + /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt() + * and do_tcp_setsockopt(). + */ + val = READ_ONCE(tp->keepalive_probes); + + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -2046,14 +2054,18 @@ INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)) INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); -int tcp_gro_complete(struct sk_buff *skb); +void tcp_gro_complete(struct sk_buff *skb); void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); + u32 val; + + val = READ_ONCE(tp->notsent_lowat); + + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); diff --git a/include/net/tls.h b/include/net/tls.h index 596595c4b1af..5e71dd3df8ca 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -259,7 +259,7 @@ struct tls_context { struct scatterlist *partially_sent_record; u16 partially_sent_offset; - bool in_tcp_sendpages; + bool splicing_pages; bool pending_open_record_frags; struct mutex tx_lock; /* protects partially_sent_* fields and @@ -370,10 +370,12 @@ struct sk_buff * tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev, struct sk_buff *skb); -static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) +static inline bool tls_is_skb_tx_device_offloaded(const struct sk_buff *skb) { -#ifdef CONFIG_SOCK_VALIDATE_XMIT - return sk_fullsock(sk) && +#ifdef CONFIG_TLS_DEVICE + struct sock *sk = skb->sk; + + return sk && sk_fullsock(sk) && (smp_load_acquire(&sk->sk_validate_xmit_skb) == &tls_validate_xmit_skb); #else diff --git a/include/net/udp.h b/include/net/udp.h index de4b528522bb..4d13424f8f72 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -21,6 +21,7 @@ #include <linux/list.h> #include <linux/bug.h> #include <net/inet_sock.h> +#include <net/gso.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ip.h> @@ -278,12 +279,13 @@ int udp_get_port(struct sock *sk, unsigned short snum, int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +void udp_splice_eof(struct socket *sock); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); -int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int udp_ioctl(struct sock *sk, int cmd, int *karg); int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); @@ -437,7 +439,6 @@ struct udp_seq_afinfo { struct udp_iter_state { struct seq_net_private p; int bucket; - struct udp_seq_afinfo *bpf_seq_afinfo; }; void *udp_seq_start(struct seq_file *seq, loff_t *pos); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 20bd7d893e10..6a9f8a5f387c 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -328,6 +328,7 @@ struct vxlan_dev { #define VXLAN_F_TTL_INHERIT 0x10000 #define VXLAN_F_VNIFILTER 0x20000 #define VXLAN_F_MDB 0x40000 +#define VXLAN_F_LOCALBYPASS 0x80000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -348,7 +349,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_COLLECT_METADATA | \ - VXLAN_F_VNIFILTER) + VXLAN_F_VNIFILTER | \ + VXLAN_F_LOCALBYPASS) struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); @@ -384,10 +386,15 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, return features; } -/* IP header + UDP + VXLAN + Ethernet header */ -#define VXLAN_HEADROOM (20 + 8 + 8 + 14) -/* IPv6 header + UDP + VXLAN + Ethernet header */ -#define VXLAN6_HEADROOM (40 + 8 + 8 + 14) +static inline int vxlan_headroom(u32 flags) +{ + /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */ + /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */ + return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + + sizeof(struct udphdr) + sizeof(struct vxlanhdr) + + (flags & VXLAN_F_GPE ? 0 : ETH_HLEN); +} static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) { @@ -549,12 +556,12 @@ static inline void vxlan_flag_attr_error(int attrtype, } static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh, - int hash, + u32 hash, struct vxlan_rdst *rdst) { struct fib_nh_common *nhc; - nhc = nexthop_path_fdb_result(nh, hash); + nhc = nexthop_path_fdb_result(nh, hash >> 1); if (unlikely(!nhc)) return false; diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 9c0d860609ba..c243f906ebed 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -255,10 +255,6 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } -static inline void xsk_buff_discard(struct xdp_buff *xdp) -{ -} - static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 151ca95dd08d..363c7d510554 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1984,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; + xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; netdev_put(dev, &xso->dev_tracker); } } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index d318c769b445..a8d7b8a3688a 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -180,7 +180,7 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, if (likely(!cross_pg)) return false; - return pool->dma_pages_cnt && + return pool->dma_pages && !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK); } |
