diff options
Diffstat (limited to 'include/net')
42 files changed, 873 insertions, 272 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index 55dab604861f..2bf3092ae7ec 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -166,6 +166,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); +void tcf_idr_insert_many(struct tc_action *actions[]); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); @@ -186,10 +187,13 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, struct tc_action *actions[], size_t *attr_size, bool rtnl_held, struct netlink_ext_ack *extack); +struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + bool rtnl_held, + struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - bool rtnl_held, + struct tc_action_ops *ops, bool rtnl_held, struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c1504aa3d9cf..ba2f439bc04d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -238,6 +238,14 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, + + /* + * When this quirk is set, then the hci_suspend_notifier is not + * registered. This is intended for devices which drop completely + * from the bus on system-suspend and which will show up as a new + * HCI after resume. + */ + HCI_QUIRK_NO_SUSPEND_NOTIFIER, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 677a8c50b2ad..ebdd4afe30d2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -105,6 +105,8 @@ enum suspend_tasks { SUSPEND_POWERING_DOWN, SUSPEND_PREPARE_NOTIFIER, + + SUSPEND_SET_ADV_FILTER, __SUSPEND_NUM_TASKS }; @@ -250,15 +252,31 @@ struct adv_pattern { __u8 value[HCI_MAX_AD_LENGTH]; }; +struct adv_rssi_thresholds { + __s8 low_threshold; + __s8 high_threshold; + __u16 low_threshold_timeout; + __u16 high_threshold_timeout; + __u8 sampling_period; +}; + struct adv_monitor { struct list_head patterns; - bool active; + struct adv_rssi_thresholds rssi; __u16 handle; + + enum { + ADV_MONITOR_STATE_NOT_REGISTERED, + ADV_MONITOR_STATE_REGISTERED, + ADV_MONITOR_STATE_OFFLOADED + } state; }; #define HCI_MIN_ADV_MONITOR_HANDLE 1 -#define HCI_MAX_ADV_MONITOR_NUM_HANDLES 32 +#define HCI_MAX_ADV_MONITOR_NUM_HANDLES 32 #define HCI_MAX_ADV_MONITOR_NUM_PATTERNS 16 +#define HCI_ADV_MONITOR_EXT_NONE 1 +#define HCI_ADV_MONITOR_EXT_MSFT 2 #define HCI_MAX_SHORT_NAME_LENGTH 10 @@ -1316,10 +1334,15 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); void hci_adv_monitors_clear(struct hci_dev *hdev); -void hci_free_adv_monitor(struct adv_monitor *monitor); -int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); -int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle); +void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); +int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); +int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); +bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, + int *err); +bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err); +bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err); bool hci_is_adv_monitoring(struct hci_dev *hdev); +int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); @@ -1342,6 +1365,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define lmp_le_capable(dev) ((dev)->features[0][4] & LMP_LE) #define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR) #define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC) +#define lmp_esco_2m_capable(dev) ((dev)->features[0][5] & LMP_EDR_ESCO_2M) #define lmp_ext_inq_capable(dev) ((dev)->features[0][6] & LMP_EXT_INQ) #define lmp_le_br_capable(dev) (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR)) #define lmp_ssp_capable(dev) ((dev)->features[0][6] & LMP_SIMPLE_PAIR) @@ -1794,7 +1818,10 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); +void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); +int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); +int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 1d1232917de7..61800a7b6192 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -207,6 +207,7 @@ struct l2cap_hdr { __le16 len; __le16 cid; } __packed; +#define L2CAP_LEN_SIZE 2 #define L2CAP_HDR_SIZE 4 #define L2CAP_ENH_HDR_SIZE 6 #define L2CAP_EXT_HDR_SIZE 8 diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index f9a6638e20b3..839a2028009e 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -821,6 +821,22 @@ struct mgmt_rp_add_ext_adv_data { __u8 instance; } __packed; +struct mgmt_adv_rssi_thresholds { + __s8 high_threshold; + __le16 high_threshold_timeout; + __s8 low_threshold; + __le16 low_threshold_timeout; + __u8 sampling_period; +} __packed; + +#define MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI 0x0056 +struct mgmt_cp_add_adv_patterns_monitor_rssi { + struct mgmt_adv_rssi_thresholds rssi; + __u8 pattern_count; + struct mgmt_adv_pattern patterns[]; +} __packed; +#define MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE 8 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/include/net/bonding.h b/include/net/bonding.h index adc3da776970..019e998d944a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -89,6 +89,8 @@ #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) +#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX | NETIF_F_HW_TLS_RX) + #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; @@ -265,6 +267,8 @@ struct bond_vlan_tag { unsigned short vlan_id; }; +bool bond_sk_check(struct bonding *bond); + /** * Returns NULL if the net_device does not belong to any of the bond's slaves * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9a4bbccddc7f..911fae42b0c0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1460,6 +1460,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_DMG: 60GHz MCS * @RATE_INFO_FLAGS_HE_MCS: HE MCS information * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode + * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1468,6 +1469,7 @@ enum rate_info_flags { RATE_INFO_FLAGS_DMG = BIT(3), RATE_INFO_FLAGS_HE_MCS = BIT(4), RATE_INFO_FLAGS_EDMG = BIT(5), + RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), }; /** @@ -1756,7 +1758,7 @@ struct cfg80211_sar_specs { /** - * @struct cfg80211_sar_chan_ranges - sar frequency ranges + * struct cfg80211_sar_freq_ranges - sar frequency ranges * @start_freq: start range edge frequency * @end_freq: end range edge frequency */ @@ -2581,12 +2583,14 @@ struct cfg80211_auth_request { * authentication capability. Drivers can offload authentication to * userspace if this flag is set. Only applicable for cfg80211_connect() * request (connect callback). + * @ASSOC_REQ_DISABLE_HE: Disable HE */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), ASSOC_REQ_DISABLE_VHT = BIT(1), ASSOC_REQ_USE_RRM = BIT(2), CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3), + ASSOC_REQ_DISABLE_HE = BIT(4), }; /** @@ -3630,9 +3634,10 @@ struct mgmt_frame_regs { * All callbacks except where otherwise noted should return 0 * on success or a negative error code. * - * All operations are currently invoked under rtnl for consistency with the - * wireless extensions but this is subject to reevaluation as soon as this - * code is used more widely and we have a first user without wext. + * All operations are invoked with the wiphy mutex held. The RTNL may be + * held in addition (due to wireless extensions) but this cannot be relied + * upon except in cases where documented below. Note that due to ordering, + * the RTNL also cannot be acquired in any handlers. * * @suspend: wiphy device needs to be suspended. The variable @wow will * be %NULL or contain the enabled Wake-on-Wireless triggers that are @@ -3647,11 +3652,14 @@ struct mgmt_frame_regs { * the new netdev in the wiphy's network namespace! Returns the struct * wireless_dev, or an ERR_PTR. For P2P device wdevs, the driver must * also set the address member in the wdev. + * This additionally holds the RTNL to be able to do netdev changes. * * @del_virtual_intf: remove the virtual interface + * This additionally holds the RTNL to be able to do netdev changes. * * @change_virtual_intf: change type/configuration of virtual interface, * keep the struct wireless_dev's iftype updated. + * This additionally holds the RTNL to be able to do netdev changes. * * @add_key: add a key with the given parameters. @mac_addr will be %NULL * when adding a group key. @@ -3972,6 +3980,8 @@ struct mgmt_frame_regs { * This callback may sleep. * @reset_tid_config: Reset TID specific configuration for the peer, for the * given TIDs. This callback may sleep. + * + * @set_sar_specs: Update the SAR (TX power) settings. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4739,6 +4749,7 @@ struct wiphy_iftype_akm_suites { /** * struct wiphy - wireless hardware description + * @mtx: mutex for the data (structures) of this device * @reg_notifier: the driver's regulatory notification callback, * note that if your driver uses wiphy_apply_custom_regulatory() * the reg_notifier's request can be passed as NULL @@ -4929,8 +4940,11 @@ struct wiphy_iftype_akm_suites { * @max_data_retry_count: maximum supported per TID retry count for * configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and * %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes + * @sar_capa: SAR control capabilities */ struct wiphy { + struct mutex mtx; + /* assign these fields before you register the wiphy */ u8 perm_addr[ETH_ALEN]; @@ -5183,6 +5197,37 @@ static inline struct wiphy *wiphy_new(const struct cfg80211_ops *ops, */ int wiphy_register(struct wiphy *wiphy); +/* this is a define for better error reporting (file/line) */ +#define lockdep_assert_wiphy(wiphy) lockdep_assert_held(&(wiphy)->mtx) + +/** + * rcu_dereference_wiphy - rcu_dereference with debug checking + * @wiphy: the wiphy to check the locking on + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() + * or RTNL. Note: Please prefer wiphy_dereference() or rcu_dereference(). + */ +#define rcu_dereference_wiphy(wiphy, p) \ + rcu_dereference_check(p, lockdep_is_held(&wiphy->mtx)) + +/** + * wiphy_dereference - fetch RCU pointer when updates are prevented by wiphy mtx + * @wiphy: the wiphy to check the locking on + * @p: The pointer to read, prior to dereferencing + * + * Return the value of the specified RCU-protected pointer, but omit the + * READ_ONCE(), because caller holds the wiphy mutex used for updates. + */ +#define wiphy_dereference(wiphy, p) \ + rcu_dereference_protected(p, lockdep_is_held(&wiphy->mtx)) + +/** + * get_wiphy_regdom - get custom regdomain for the given wiphy + * @wiphy: the wiphy to get the regdomain from + */ +const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy); + /** * wiphy_unregister - deregister a wiphy from cfg80211 * @@ -5208,13 +5253,45 @@ struct cfg80211_cached_keys; struct cfg80211_cqm_config; /** + * wiphy_lock - lock the wiphy + * @wiphy: the wiphy to lock + * + * This is mostly exposed so it can be done around registering and + * unregistering netdevs that aren't created through cfg80211 calls, + * since that requires locking in cfg80211 when the notifiers is + * called, but that cannot differentiate which way it's called. + * + * When cfg80211 ops are called, the wiphy is already locked. + */ +static inline void wiphy_lock(struct wiphy *wiphy) + __acquires(&wiphy->mtx) +{ + mutex_lock(&wiphy->mtx); + __acquire(&wiphy->mtx); +} + +/** + * wiphy_unlock - unlock the wiphy again + * @wiphy: the wiphy to unlock + */ +static inline void wiphy_unlock(struct wiphy *wiphy) + __releases(&wiphy->mtx) +{ + __release(&wiphy->mtx); + mutex_unlock(&wiphy->mtx); +} + +/** * struct wireless_dev - wireless device state * * For netdevs, this structure must be allocated by the driver * that uses the ieee80211_ptr field in struct net_device (this * is intentional so it can be allocated along with the netdev.) * It need not be registered then as netdev registration will - * be intercepted by cfg80211 to see the new wireless device. + * be intercepted by cfg80211 to see the new wireless device, + * however, drivers must lock the wiphy before registering or + * unregistering netdevs if they pre-create any netdevs (in ops + * called from cfg80211, the wiphy is already locked.) * * For non-netdev uses, it must also be allocated by the driver * in response to the cfg80211 callbacks that require it, as @@ -5223,6 +5300,9 @@ struct cfg80211_cqm_config; * * @wiphy: pointer to hardware description * @iftype: interface type + * @registered: is this wdev already registered with cfg80211 + * @registering: indicates we're doing registration under wiphy lock + * for the notifier * @list: (private) Used to collect the interfaces * @netdev: (private) Used to reference back to the netdev, may be %NULL * @identifier: (private) Identifier used in nl80211 to identify this @@ -5306,7 +5386,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, is_running; + bool use_4addr, is_running, registered, registering; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -5975,18 +6055,18 @@ int regulatory_set_wiphy_regd(struct wiphy *wiphy, struct ieee80211_regdomain *rd); /** - * regulatory_set_wiphy_regd_sync_rtnl - set regdom for self-managed drivers + * regulatory_set_wiphy_regd_sync - set regdom for self-managed drivers * @wiphy: the wireless device we want to process the regulatory domain on * @rd: the regulatory domain information to use for this wiphy * - * This functions requires the RTNL to be held and applies the new regdomain - * synchronously to this wiphy. For more details see - * regulatory_set_wiphy_regd(). + * This functions requires the RTNL and the wiphy mutex to be held and + * applies the new regdomain synchronously to this wiphy. For more details + * see regulatory_set_wiphy_regd(). * * Return: 0 on success. -EINVAL, -EPERM */ -int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy, - struct ieee80211_regdomain *rd); +int regulatory_set_wiphy_regd_sync(struct wiphy *wiphy, + struct ieee80211_regdomain *rd); /** * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain @@ -6104,7 +6184,7 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid); void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid); /** - * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped + * cfg80211_sched_scan_stopped_locked - notify that the scheduled scan has stopped * * @wiphy: the wiphy on which the scheduled scan stopped * @reqid: identifier for the related scheduled scan request @@ -6112,9 +6192,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid); * The driver can call this function to inform cfg80211 that the * scheduled scan had to be stopped, for whatever reason. The driver * is then called back via the sched_scan_stop operation when done. - * This function should be called with rtnl locked. + * This function should be called with the wiphy mutex held. */ -void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid); +void cfg80211_sched_scan_stopped_locked(struct wiphy *wiphy, u64 reqid); /** * cfg80211_inform_bss_frame_data - inform cfg80211 of a received BSS frame @@ -7551,7 +7631,7 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * also checks if IR-relaxation conditions apply, to allow beaconing under * more permissive conditions. * - * Requires the RTNL to be held. + * Requires the wiphy mutex to be held. */ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, @@ -7649,19 +7729,46 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); * cfg80211_unregister_wdev - remove the given wdev * @wdev: struct wireless_dev to remove * - * Call this function only for wdevs that have no netdev assigned, - * e.g. P2P Devices. It removes the device from the list so that - * it can no longer be used. It is necessary to call this function - * even when cfg80211 requests the removal of the interface by - * calling the del_virtual_intf() callback. The function must also - * be called when the driver wishes to unregister the wdev, e.g. - * when the device is unbound from the driver. + * This function removes the device so it can no longer be used. It is necessary + * to call this function even when cfg80211 requests the removal of the device + * by calling the del_virtual_intf() callback. The function must also be called + * when the driver wishes to unregister the wdev, e.g. when the hardware device + * is unbound from the driver. * - * Requires the RTNL to be held. + * Requires the RTNL and wiphy mutex to be held. */ void cfg80211_unregister_wdev(struct wireless_dev *wdev); /** + * cfg80211_register_netdevice - register the given netdev + * @dev: the netdev to register + * + * Note: In contexts coming from cfg80211 callbacks, you must call this rather + * than register_netdevice(), unregister_netdev() is impossible as the RTNL is + * held. Otherwise, both register_netdevice() and register_netdev() are usable + * instead as well. + * + * Requires the RTNL and wiphy mutex to be held. + */ +int cfg80211_register_netdevice(struct net_device *dev); + +/** + * cfg80211_unregister_netdevice - unregister the given netdev + * @dev: the netdev to register + * + * Note: In contexts coming from cfg80211 callbacks, you must call this rather + * than unregister_netdevice(), unregister_netdev() is impossible as the RTNL + * is held. Otherwise, both unregister_netdevice() and unregister_netdev() are + * usable instead as well. + * + * Requires the RTNL and wiphy mutex to be held. + */ +static inline void cfg80211_unregister_netdevice(struct net_device *dev) +{ + cfg80211_unregister_wdev(dev->ieee80211_ptr); +} + +/** * struct cfg80211_ft_event_params - FT Information Elements * @ies: FT IEs * @ies_len: length of the FT IE in bytes diff --git a/include/net/devlink.h b/include/net/devlink.h index f466819cc477..853420db5d32 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -94,6 +94,18 @@ struct devlink_port_pci_vf_attrs { }; /** + * struct devlink_port_pci_sf_attrs - devlink port's PCI SF attributes + * @controller: Associated controller number + * @sf: Associated PCI SF for of the PCI PF for this port. + * @pf: Associated PCI PF number for this port. + */ +struct devlink_port_pci_sf_attrs { + u32 controller; + u32 sf; + u16 pf; +}; + +/** * struct devlink_port_attrs - devlink port object * @flavour: flavour of the port * @split: indicates if this is split port @@ -103,6 +115,7 @@ struct devlink_port_pci_vf_attrs { * @phys: physical port attributes * @pci_pf: PCI PF port attributes * @pci_vf: PCI VF port attributes + * @pci_sf: PCI SF port attributes */ struct devlink_port_attrs { u8 split:1, @@ -114,6 +127,7 @@ struct devlink_port_attrs { struct devlink_port_phys_attrs phys; struct devlink_port_pci_pf_attrs pci_pf; struct devlink_port_pci_vf_attrs pci_vf; + struct devlink_port_pci_sf_attrs pci_sf; }; }; @@ -138,6 +152,17 @@ struct devlink_port { struct mutex reporters_lock; /* Protects reporter_list */ }; +struct devlink_port_new_attrs { + enum devlink_port_flavour flavour; + unsigned int port_index; + u32 controller; + u32 sfnum; + u16 pfnum; + u8 port_index_valid:1, + controller_valid:1, + sfnum_valid:1; +}; + struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; @@ -380,6 +405,8 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 +#define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" + #define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, @@ -836,6 +863,7 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_GTP_PARSING, DEVLINK_TRAP_GENERIC_ID_ESP_PARSING, DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP, + DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -1061,6 +1089,8 @@ enum devlink_trap_group_generic_id { "esp_parsing" #define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_NEXTHOP \ "blackhole_nexthop" +#define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \ + "dmac_filter" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" @@ -1348,6 +1378,79 @@ struct devlink_ops { int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); + /** + * port_new() - Add a new port function of a specified flavor + * @devlink: Devlink instance + * @attrs: attributes of the new port + * @extack: extack for reporting error messages + * @new_port_index: index of the new port + * + * Devlink core will call this device driver function upon user request + * to create a new port function of a specified flavor and optional + * attributes + * + * Notes: + * - Called without devlink instance lock being held. Drivers must + * implement own means of synchronization + * - On success, drivers must register a port with devlink core + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_new)(struct devlink *devlink, + const struct devlink_port_new_attrs *attrs, + struct netlink_ext_ack *extack, + unsigned int *new_port_index); + /** + * port_del() - Delete a port function + * @devlink: Devlink instance + * @port_index: port function index to delete + * @extack: extack for reporting error messages + * + * Devlink core will call this device driver function upon user request + * to delete a previously created port function + * + * Notes: + * - Called without devlink instance lock being held. Drivers must + * implement own means of synchronization + * - On success, drivers must unregister the corresponding devlink + * port + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_del)(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack); + /** + * port_fn_state_get() - Get the state of a port function + * @devlink: Devlink instance + * @port: The devlink port + * @state: Admin configured state + * @opstate: Current operational state + * @extack: extack for reporting error messages + * + * Reports the admin and operational state of a devlink port function + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_fn_state_get)(struct devlink *devlink, + struct devlink_port *port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack); + /** + * port_fn_state_set() - Set the admin state of a port function + * @devlink: Devlink instance + * @port: The devlink port + * @state: Admin state + * @extack: extack for reporting error messages + * + * Set the admin state of a devlink port function + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_fn_state_set)(struct devlink *devlink, + struct devlink_port *port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) @@ -1404,6 +1507,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro u16 pf, bool external); void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u16 vf, bool external); +void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, + u32 controller, u16 pf, u32 sf); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/net/dsa.h b/include/net/dsa.h index 4e60d2610f20..83a933e563fe 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -46,6 +46,9 @@ struct phylink_link_state; #define DSA_TAG_PROTO_AR9331_VALUE 16 #define DSA_TAG_PROTO_RTL4_A_VALUE 17 #define DSA_TAG_PROTO_HELLCREEK_VALUE 18 +#define DSA_TAG_PROTO_XRS700X_VALUE 19 +#define DSA_TAG_PROTO_OCELOT_8021Q_VALUE 20 +#define DSA_TAG_PROTO_SEVILLE_VALUE 21 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -67,6 +70,9 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE, + DSA_TAG_PROTO_XRS700X = DSA_TAG_PROTO_XRS700X_VALUE, + DSA_TAG_PROTO_OCELOT_8021Q = DSA_TAG_PROTO_OCELOT_8021Q_VALUE, + DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, }; struct packet_type; @@ -138,6 +144,9 @@ struct dsa_switch_tree { /* Has this tree been applied to the hardware? */ bool setup; + /* Tagging protocol operations */ + const struct dsa_device_ops *tag_ops; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. @@ -149,8 +158,45 @@ struct dsa_switch_tree { /* List of DSA links composing the routing table */ struct list_head rtable; + + /* Maps offloaded LAG netdevs to a zero-based linear ID for + * drivers that need it. + */ + struct net_device **lags; + unsigned int lags_len; }; +#define dsa_lags_foreach_id(_id, _dst) \ + for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \ + if ((_dst)->lags[(_id)]) + +#define dsa_lag_foreach_port(_dp, _dst, _lag) \ + list_for_each_entry((_dp), &(_dst)->ports, list) \ + if ((_dp)->lag_dev == (_lag)) + +#define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ + list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) + +static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, + unsigned int id) +{ + return dst->lags[id]; +} + +static inline int dsa_lag_id(struct dsa_switch_tree *dst, + struct net_device *lag) +{ + unsigned int id; + + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_dev(dst, id) == lag) + return id; + } + + return -ENODEV; +} + /* TC matchall action types */ enum dsa_port_mall_action_type { DSA_PORT_MALL_MIRROR, @@ -190,7 +236,9 @@ struct dsa_port { struct net_device *slave; }; - /* CPU port tagging operations used by master or slave devices */ + /* Copy of the tagging protocol operations, for quicker access + * in the data path. Valid only for the CPU ports. + */ const struct dsa_device_ops *tag_ops; /* Copies for faster access in master receive hot path */ @@ -220,6 +268,9 @@ struct dsa_port { bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; + struct net_device *lag_dev; + bool lag_tx_enabled; + struct net_device *hsr_dev; struct list_head list; @@ -319,6 +370,11 @@ struct dsa_switch { */ bool untag_bridge_pvid; + /* Let DSA manage the FDB entries towards the CPU, based on the + * software bridge database. + */ + bool assisted_learning_on_cpu_port; + /* In case vlan_filtering_is_global is set, the VLAN awareness state * should be retrieved from here and not from the per-port settings. */ @@ -335,6 +391,14 @@ struct dsa_switch { */ bool mtu_enforcement_ingress; + /* Drivers that benefit from having an ID associated with each + * offloaded LAG should set this to the maximum number of + * supported IDs. DSA will then maintain a mapping of _at + * least_ these many IDs, accessible to drivers via + * dsa_lag_id(). + */ + unsigned int num_lag_ids; + size_t num_ports; }; @@ -430,9 +494,18 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { + /* + * Tagging protocol helpers called for the CPU ports and DSA links. + * @get_tag_protocol retrieves the initial tagging protocol and is + * mandatory. Switches which can operate using multiple tagging + * protocols should implement @change_tag_protocol and report in + * @get_tag_protocol the tagger in current use. + */ enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); + int (*change_tag_protocol)(struct dsa_switch *ds, int port, + enum dsa_tag_protocol proto); int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); @@ -477,7 +550,7 @@ struct dsa_switch_ops { void (*phylink_fixed_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); /* - * ethtool hardware statistics. + * Port statistics counters. */ void (*get_strings)(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data); @@ -486,6 +559,8 @@ struct dsa_switch_ops { int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); void (*get_ethtool_phy_stats)(struct dsa_switch *ds, int port, uint64_t *data); + void (*get_stats64)(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s); /* * ethtool Wake-on-LAN @@ -553,19 +628,24 @@ struct dsa_switch_ops { void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); - int (*port_egress_floods)(struct dsa_switch *ds, int port, - bool unicast, bool multicast); + int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); + int (*port_bridge_flags)(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); + int (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter, + struct netlink_ext_ack *extack); /* * VLAN support */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering, - struct switchdev_trans *trans); - int (*port_vlan_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); - void (*port_vlan_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); + struct netlink_ext_ack *extack); + int (*port_vlan_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); /* @@ -581,10 +661,8 @@ struct dsa_switch_ops { /* * Multicast database */ - int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + int (*port_mdb_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); - void (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); /* @@ -624,6 +702,13 @@ struct dsa_switch_ops { void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct net_device *br); + int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, + int port); + int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag, + struct netdev_lag_upper_info *info); + int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag); /* * PTP functionality @@ -645,6 +730,40 @@ struct dsa_switch_ops { int (*devlink_info_get)(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack); + int (*devlink_sb_pool_get)(struct dsa_switch *ds, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*devlink_sb_pool_set)(struct dsa_switch *ds, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack); + int (*devlink_sb_port_pool_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*devlink_sb_port_pool_set)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 threshold, + struct netlink_ext_ack *extack); + int (*devlink_sb_tc_pool_bind_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*devlink_sb_tc_pool_bind_set)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack); + int (*devlink_sb_occ_snapshot)(struct dsa_switch *ds, + unsigned int sb_index); + int (*devlink_sb_occ_max_clear)(struct dsa_switch *ds, + unsigned int sb_index); + int (*devlink_sb_occ_port_pool_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*devlink_sb_occ_tc_port_bind_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); /* * MTU change functionality. Switches can also adjust their MRU through @@ -655,6 +774,36 @@ struct dsa_switch_ops { int (*port_change_mtu)(struct dsa_switch *ds, int port, int new_mtu); int (*port_max_mtu)(struct dsa_switch *ds, int port); + + /* + * LAG integration + */ + int (*port_lag_change)(struct dsa_switch *ds, int port); + int (*port_lag_join)(struct dsa_switch *ds, int port, + struct net_device *lag, + struct netdev_lag_upper_info *info); + int (*port_lag_leave)(struct dsa_switch *ds, int port, + struct net_device *lag); + + /* + * HSR integration + */ + int (*port_hsr_join)(struct dsa_switch *ds, int port, + struct net_device *hsr); + int (*port_hsr_leave)(struct dsa_switch *ds, int port, + struct net_device *hsr); + + /* + * MRP integration + */ + int (*port_mrp_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_mrp *mrp); + int (*port_mrp_del)(struct dsa_switch *ds, int port, + const struct switchdev_obj_mrp *mrp); + int (*port_mrp_add_ring_role)(struct dsa_switch *ds, int port, + const struct switchdev_obj_ring_role_mrp *mrp); + int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port, + const struct switchdev_obj_ring_role_mrp *mrp); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ @@ -828,57 +977,15 @@ static inline int dsa_switch_resume(struct dsa_switch *ds) } #endif /* CONFIG_PM_SLEEP */ -enum dsa_notifier_type { - DSA_PORT_REGISTER, - DSA_PORT_UNREGISTER, -}; - -struct dsa_notifier_info { - struct net_device *dev; -}; - -struct dsa_notifier_register_info { - struct dsa_notifier_info info; /* must be first */ - struct net_device *master; - unsigned int port_number; - unsigned int switch_number; -}; - -static inline struct net_device * -dsa_notifier_info_to_dev(const struct dsa_notifier_info *info) -{ - return info->dev; -} - #if IS_ENABLED(CONFIG_NET_DSA) -int register_dsa_notifier(struct notifier_block *nb); -int unregister_dsa_notifier(struct notifier_block *nb); -int call_dsa_notifiers(unsigned long val, struct net_device *dev, - struct dsa_notifier_info *info); +bool dsa_slave_dev_check(const struct net_device *dev); #else -static inline int register_dsa_notifier(struct notifier_block *nb) +static inline bool dsa_slave_dev_check(const struct net_device *dev) { - return 0; -} - -static inline int unregister_dsa_notifier(struct notifier_block *nb) -{ - return 0; -} - -static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, - struct dsa_notifier_info *info) -{ - return NOTIFY_DONE; + return false; } #endif -/* Broadcom tag specific helpers to insert and extract queue/port number */ -#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q) -#define BRCM_TAG_GET_PORT(v) ((v) >> 8) -#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) - - netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); diff --git a/include/net/dst.h b/include/net/dst.h index 10f0a8399867..26f134ad3a25 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -18,6 +18,7 @@ #include <linux/refcount.h> #include <net/neighbour.h> #include <asm/processor.h> +#include <linux/indirect_call_wrapper.h> struct sk_buff; @@ -193,9 +194,11 @@ dst_feature(const struct dst_entry *dst, u32 feature) return dst_metric(dst, RTAX_FEATURES) & feature; } +INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *)); +INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *)); static inline u32 dst_mtu(const struct dst_entry *dst) { - return dst->ops->mtu(dst); + return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); } /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ @@ -435,22 +438,36 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) dst->expires = expires; } +INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, + struct sk_buff *)); /* Output packet to network from transport. */ static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - return skb_dst(skb)->output(net, sk, skb); + return INDIRECT_CALL_INET(skb_dst(skb)->output, + ip6_output, ip_output, + net, sk, skb); } +INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); /* Input packet from network to transport. */ static inline int dst_input(struct sk_buff *skb) { - return skb_dst(skb)->input(skb); + return INDIRECT_CALL_INET(skb_dst(skb)->input, + ip6_input, ip_local_deliver, skb); } +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, + u32)); +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, + u32)); static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) { if (dst->obsolete) - dst = dst->ops->check(dst, cookie); + dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, + ipv4_dst_check, dst, cookie); return dst; } diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 123b1e9ea304..e6bd8ebf9ac3 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -245,6 +245,7 @@ struct flow_action_entry { unsigned long cookie; u32 mark; u32 labels[4]; + bool orig_dir; } ct_metadata; struct { /* FLOW_ACTION_MPLS_PUSH */ u32 label; diff --git a/include/net/fq.h b/include/net/fq.h index e39f3f8d5f8a..2eccbbd2b559 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -19,8 +19,6 @@ struct fq_tin; * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ * (deficit round robin) based round robin queuing similar to the one * found in net/sched/sch_fq_codel.c - * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of - * fat flows and efficient head-dropping if packet limit is reached * @queue: sk_buff queue to hold packets * @backlog: number of bytes pending in the queue. The number of packets can be * found in @queue.qlen @@ -29,7 +27,6 @@ struct fq_tin; struct fq_flow { struct fq_tin *tin; struct list_head flowchain; - struct list_head backlogchain; struct sk_buff_head queue; u32 backlog; int deficit; @@ -47,6 +44,8 @@ struct fq_flow { struct fq_tin { struct list_head new_flows; struct list_head old_flows; + struct list_head tin_list; + struct fq_flow default_flow; u32 backlog_bytes; u32 backlog_packets; u32 overlimit; @@ -59,14 +58,14 @@ struct fq_tin { /** * struct fq - main container for fair queuing purposes * - * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient - * head-dropping when @backlog reaches @limit * @limit: max number of packets that can be queued across all flows * @backlog: number of packets queued across all flows */ struct fq { struct fq_flow *flows; - struct list_head backlogs; + unsigned long *flows_bitmap; + + struct list_head tin_backlog; spinlock_t lock; u32 flows_cnt; u32 limit; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index e73d74d2fabf..a5f67a2c0c73 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -11,35 +11,37 @@ /* functions that are embedded into includer */ -static void fq_adjust_removal(struct fq *fq, - struct fq_flow *flow, - struct sk_buff *skb) + +static void +__fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets, + unsigned int bytes, unsigned int truesize) { struct fq_tin *tin = flow->tin; + int idx; - tin->backlog_bytes -= skb->len; - tin->backlog_packets--; - flow->backlog -= skb->len; - fq->backlog--; - fq->memory_usage -= skb->truesize; -} + tin->backlog_bytes -= bytes; + tin->backlog_packets -= packets; + flow->backlog -= bytes; + fq->backlog -= packets; + fq->memory_usage -= truesize; -static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) -{ - struct fq_flow *i; + if (flow->backlog) + return; - if (flow->backlog == 0) { - list_del_init(&flow->backlogchain); - } else { - i = flow; + if (flow == &tin->default_flow) { + list_del_init(&tin->tin_list); + return; + } - list_for_each_entry_continue(i, &fq->backlogs, backlogchain) - if (i->backlog < flow->backlog) - break; + idx = flow - fq->flows; + __clear_bit(idx, fq->flows_bitmap); +} - list_move_tail(&flow->backlogchain, - &i->backlogchain); - } +static void fq_adjust_removal(struct fq *fq, + struct fq_flow *flow, + struct sk_buff *skb) +{ + __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize); } static struct sk_buff *fq_flow_dequeue(struct fq *fq, @@ -54,11 +56,37 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, return NULL; fq_adjust_removal(fq, flow, skb); - fq_rejigger_backlog(fq, flow); return skb; } +static int fq_flow_drop(struct fq *fq, struct fq_flow *flow, + fq_skb_free_t free_func) +{ + unsigned int packets = 0, bytes = 0, truesize = 0; + struct fq_tin *tin = flow->tin; + struct sk_buff *skb; + int pending; + + lockdep_assert_held(&fq->lock); + + pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2); + do { + skb = __skb_dequeue(&flow->queue); + if (!skb) + break; + + packets++; + bytes += skb->len; + truesize += skb->truesize; + free_func(fq, tin, flow, skb); + } while (packets < pending); + + __fq_adjust_removal(fq, flow, packets, bytes, truesize); + + return packets; +} + static struct sk_buff *fq_tin_dequeue(struct fq *fq, struct fq_tin *tin, fq_tin_dequeue_t dequeue_func) @@ -115,8 +143,7 @@ static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) static struct fq_flow *fq_flow_classify(struct fq *fq, struct fq_tin *tin, u32 idx, - struct sk_buff *skb, - fq_flow_get_default_t get_default_func) + struct sk_buff *skb) { struct fq_flow *flow; @@ -124,7 +151,7 @@ static struct fq_flow *fq_flow_classify(struct fq *fq, flow = &fq->flows[idx]; if (flow->tin && flow->tin != tin) { - flow = get_default_func(fq, tin, idx, skb); + flow = &tin->default_flow; tin->collisions++; fq->collisions++; } @@ -135,36 +162,56 @@ static struct fq_flow *fq_flow_classify(struct fq *fq, return flow; } -static void fq_recalc_backlog(struct fq *fq, - struct fq_tin *tin, - struct fq_flow *flow) +static struct fq_flow *fq_find_fattest_flow(struct fq *fq) { - struct fq_flow *i; + struct fq_tin *tin; + struct fq_flow *flow = NULL; + u32 len = 0; + int i; - if (list_empty(&flow->backlogchain)) - list_add_tail(&flow->backlogchain, &fq->backlogs); + for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) { + struct fq_flow *cur = &fq->flows[i]; + unsigned int cur_len; - i = flow; - list_for_each_entry_continue_reverse(i, &fq->backlogs, - backlogchain) - if (i->backlog > flow->backlog) - break; + cur_len = cur->backlog; + if (cur_len <= len) + continue; + + flow = cur; + len = cur_len; + } + + list_for_each_entry(tin, &fq->tin_backlog, tin_list) { + unsigned int cur_len = tin->default_flow.backlog; - list_move(&flow->backlogchain, &i->backlogchain); + if (cur_len <= len) + continue; + + flow = &tin->default_flow; + len = cur_len; + } + + return flow; } static void fq_tin_enqueue(struct fq *fq, struct fq_tin *tin, u32 idx, struct sk_buff *skb, - fq_skb_free_t free_func, - fq_flow_get_default_t get_default_func) + fq_skb_free_t free_func) { struct fq_flow *flow; bool oom; lockdep_assert_held(&fq->lock); - flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); + flow = fq_flow_classify(fq, tin, idx, skb); + + if (!flow->backlog) { + if (flow != &tin->default_flow) + __set_bit(idx, fq->flows_bitmap); + else if (list_empty(&tin->tin_list)) + list_add(&tin->tin_list, &fq->tin_backlog); + } flow->tin = tin; flow->backlog += skb->len; @@ -173,8 +220,6 @@ static void fq_tin_enqueue(struct fq *fq, fq->memory_usage += skb->truesize; fq->backlog++; - fq_recalc_backlog(fq, tin, flow); - if (list_empty(&flow->flowchain)) { flow->deficit = fq->quantum; list_add_tail(&flow->flowchain, @@ -184,18 +229,13 @@ static void fq_tin_enqueue(struct fq *fq, __skb_queue_tail(&flow->queue, skb); oom = (fq->memory_usage > fq->memory_limit); while (fq->backlog > fq->limit || oom) { - flow = list_first_entry_or_null(&fq->backlogs, - struct fq_flow, - backlogchain); + flow = fq_find_fattest_flow(fq); if (!flow) return; - skb = fq_flow_dequeue(fq, flow); - if (!skb) + if (!fq_flow_drop(fq, flow, free_func)) return; - free_func(fq, flow->tin, flow, skb); - flow->tin->overlimit++; fq->overlimit++; if (oom) { @@ -224,8 +264,6 @@ static void fq_flow_filter(struct fq *fq, fq_adjust_removal(fq, flow, skb); free_func(fq, tin, flow, skb); } - - fq_rejigger_backlog(fq, flow); } static void fq_tin_filter(struct fq *fq, @@ -248,16 +286,18 @@ static void fq_flow_reset(struct fq *fq, struct fq_flow *flow, fq_skb_free_t free_func) { + struct fq_tin *tin = flow->tin; struct sk_buff *skb; while ((skb = fq_flow_dequeue(fq, flow))) - free_func(fq, flow->tin, flow, skb); + free_func(fq, tin, flow, skb); - if (!list_empty(&flow->flowchain)) + if (!list_empty(&flow->flowchain)) { list_del_init(&flow->flowchain); - - if (!list_empty(&flow->backlogchain)) - list_del_init(&flow->backlogchain); + if (list_empty(&tin->new_flows) && + list_empty(&tin->old_flows)) + list_del_init(&tin->tin_list); + } flow->tin = NULL; @@ -283,6 +323,7 @@ static void fq_tin_reset(struct fq *fq, fq_flow_reset(fq, flow, free_func); } + WARN_ON_ONCE(!list_empty(&tin->tin_list)); WARN_ON_ONCE(tin->backlog_bytes); WARN_ON_ONCE(tin->backlog_packets); } @@ -290,7 +331,6 @@ static void fq_tin_reset(struct fq *fq, static void fq_flow_init(struct fq_flow *flow) { INIT_LIST_HEAD(&flow->flowchain); - INIT_LIST_HEAD(&flow->backlogchain); __skb_queue_head_init(&flow->queue); } @@ -298,6 +338,8 @@ static void fq_tin_init(struct fq_tin *tin) { INIT_LIST_HEAD(&tin->new_flows); INIT_LIST_HEAD(&tin->old_flows); + INIT_LIST_HEAD(&tin->tin_list); + fq_flow_init(&tin->default_flow); } static int fq_init(struct fq *fq, int flows_cnt) @@ -305,8 +347,8 @@ static int fq_init(struct fq *fq, int flows_cnt) int i; memset(fq, 0, sizeof(fq[0])); - INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); + INIT_LIST_HEAD(&fq->tin_backlog); fq->flows_cnt = max_t(u32, flows_cnt, 1); fq->quantum = 300; fq->limit = 8192; @@ -316,6 +358,14 @@ static int fq_init(struct fq *fq, int flows_cnt) if (!fq->flows) return -ENOMEM; + fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long), + GFP_KERNEL); + if (!fq->flows_bitmap) { + kvfree(fq->flows); + fq->flows = NULL; + return -ENOMEM; + } + for (i = 0; i < fq->flows_cnt; i++) fq_flow_init(&fq->flows[i]); @@ -332,6 +382,9 @@ static void fq_reset(struct fq *fq, kvfree(fq->flows); fq->flows = NULL; + + kfree(fq->flows_bitmap); + fq->flows_bitmap = NULL; } #endif diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e55ec1597ce7..7cb3fa8310ed 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -14,6 +14,7 @@ */ struct genl_multicast_group { char name[GENL_NAMSIZ]; + u8 flags; }; struct genl_ops; diff --git a/include/net/gre.h b/include/net/gre.h index b60f212c16c6..4e209708b754 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -106,17 +106,6 @@ static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags) return flags; } -static inline __sum16 gre_checksum(struct sk_buff *skb) -{ - __wsum csum; - - if (skb->ip_summed == CHECKSUM_PARTIAL) - csum = lco_csum(skb); - else - csum = skb_checksum(skb, 0, skb->len, 0); - return csum_fold(csum); -} - static inline void gre_build_header(struct sk_buff *skb, int hdr_len, __be16 flags, __be16 proto, __be32 key, __be32 seq) @@ -146,7 +135,13 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len, !(skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { *ptr = 0; - *(__sum16 *)ptr = gre_checksum(skb); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + *(__sum16 *)ptr = csum_fold(lco_csum(skb)); + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = sizeof(*greh); + } } } } diff --git a/include/net/gro.h b/include/net/gro.h new file mode 100644 index 000000000000..8a6eb5303cc4 --- /dev/null +++ b/include/net/gro.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_IPV6_GRO_H +#define _NET_IPV6_GRO_H + +INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); +#endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index cb2818862919..cad2a611efde 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,6 +41,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); #define BIND_WITH_LOCK (1 << 1) /* Called from BPF program. */ #define BIND_FROM_BPF (1 << 2) +/* Skip CAP_NET_BIND_SERVICE check. */ +#define BIND_NO_CAP_NET_BIND_SERVICE (1 << 3) int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags); int inet_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7338b3865a2a..10a625760de9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -76,6 +76,8 @@ struct inet_connection_sock_af_ops { * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data * @icsk_mtup; MTU probing control data + * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack) + * @icsk_user_timeout: TCP_USER_TIMEOUT value */ struct inet_connection_sock { /* inet_sock has to be the first member! */ @@ -118,17 +120,18 @@ struct inet_connection_sock { __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { - int enabled; - /* Range of MTUs to search */ int search_high; int search_low; /* Information on the current probe. */ - int probe_size; + u32 probe_size:31, + /* Is the MTUP feature enabled for this connection? */ + enabled:1; u32 probe_timestamp; } icsk_mtup; + u32 icsk_probes_tstamp; u32 icsk_user_timeout; u64 icsk_ca_priv[104 / sizeof(u64)]; @@ -138,7 +141,6 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ -#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ #define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ @@ -224,8 +226,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, } if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || - what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE || - what == ICSK_TIME_REO_TIMEOUT) { + what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index ac5ff3c3afb1..15b7fbe6b15c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -195,7 +195,8 @@ struct fib6_info { fib6_destroying:1, offload:1, trap:1, - unused:2; + offload_failed:1, + unused:1; struct rcu_head rcu; struct nexthop *nh; @@ -336,13 +337,6 @@ static inline void fib6_info_release(struct fib6_info *f6i) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } -static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload, - bool trap) -{ - f6i->offload = offload; - f6i->trap = trap; -} - enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, @@ -545,6 +539,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) { return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); } +void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, + bool offload, bool trap, bool offload_failed); #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 2a5277758379..f51a118bfce8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,7 +174,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, struct net_device *dev); struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, - struct net_device *dev, unsigned int pref); + struct net_device *dev, unsigned int pref, + u32 defrtr_usr_metric); void rt6_purge_dflt_routers(struct net *net); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2ec062aaa978..a914f33f3ed5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -213,7 +213,8 @@ struct fib_rt_info { u8 type; u8 offload:1, trap:1, - unused:6; + offload_failed:1, + unused:5; }; struct fib_entry_notifier_info { diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d609e957a3ec..7cb5a1aace40 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1712,4 +1712,15 @@ ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) atomic_read(&dest->inactconns); } +#ifdef CONFIG_IP_VS_PROTO_TCP +INDIRECT_CALLABLE_DECLARE(int + tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP +INDIRECT_CALLABLE_DECLARE(int + udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif #endif /* _NET_IP_VS_H */ diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 9259ce2b22f3..ff06246dbbb9 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -128,11 +128,12 @@ struct iucv_sock { u8 flags; u16 msglimit; u16 msglimit_peer; + atomic_t skbs_in_xmit; atomic_t msg_sent; atomic_t msg_recv; atomic_t pendings; int transport; - void (*sk_txnotify)(struct sk_buff *skb, + void (*sk_txnotify)(struct sock *sk, enum iucv_tx_notify n); }; diff --git a/include/net/lapb.h b/include/net/lapb.h index ccc3d1f020b0..eee73442a1ba 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -92,6 +92,7 @@ struct lapb_cb { unsigned short n2, n2count; unsigned short t1, t2; struct timer_list t1timer, t2timer; + bool t1timer_stop, t2timer_stop; /* Internal control information */ struct sk_buff_head write_queue; @@ -103,6 +104,7 @@ struct lapb_cb { struct lapb_frame frmr_data; unsigned char frmr_type; + spinlock_t lock; refcount_t refcnt; }; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d315740581f1..2d1d629e5d14 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1296,6 +1296,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * the "0-length PSDU" field included there. The value for it is * in &struct ieee80211_rx_status. Note that if this value isn't * known the frame shouldn't be reported. + * @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by + * hardware or driver) */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1328,6 +1330,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_HE_MU = BIT(27), RX_FLAG_RADIOTAP_LSIG = BIT(28), RX_FLAG_NO_PSDU = BIT(29), + RX_FLAG_8023 = BIT(30), }; /** @@ -1649,11 +1652,15 @@ enum ieee80211_vif_flags { * The driver supports sending frames passed as 802.3 frames by mac80211. * It must also support sending 802.11 packets for the same interface. * @IEEE80211_OFFLOAD_ENCAP_4ADDR: support 4-address mode encapsulation offload + * @IEEE80211_OFFLOAD_DECAP_ENABLED: rx encapsulation offload is enabled + * The driver supports passing received 802.11 frames as 802.3 frames to + * mac80211. */ enum ieee80211_offload_flags { IEEE80211_OFFLOAD_ENCAP_ENABLED = BIT(0), IEEE80211_OFFLOAD_ENCAP_4ADDR = BIT(1), + IEEE80211_OFFLOAD_DECAP_ENABLED = BIT(2), }; /** @@ -2389,6 +2396,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD: Hardware supports tx encapsulation * offload * + * @IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD: Hardware supports rx decapsulation + * offload + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2442,6 +2452,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID, IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT, IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD, + IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3880,6 +3891,9 @@ enum ieee80211_reconfig_type { * This callback may sleep. * @sta_set_4addr: Called to notify the driver when a station starts/stops using * 4-address mode + * @set_sar_specs: Update the SAR (TX power) settings. + * @sta_set_decap_offload: Called to notify the driver when a station is allowed + * to use rx decapsulation offload */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4197,6 +4211,9 @@ struct ieee80211_ops { struct ieee80211_sta *sta, bool enabled); int (*set_sar_specs)(struct ieee80211_hw *hw, const struct cfg80211_sar_specs *sar); + void (*sta_set_decap_offload)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, bool enabled); }; /** @@ -5512,7 +5529,7 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, void *data); /** - * ieee80211_iterate_active_interfaces_rtnl - iterate active interfaces + * ieee80211_iterate_active_interfaces_mtx - iterate active interfaces * * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. @@ -5523,12 +5540,12 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * @iterator: the iterator function to call, cannot sleep * @data: first argument of the iterator function */ -void ieee80211_iterate_active_interfaces_rtnl(struct ieee80211_hw *hw, - u32 iter_flags, - void (*iterator)(void *data, +void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, + u32 iter_flags, + void (*iterator)(void *data, u8 *mac, struct ieee80211_vif *vif), - void *data); + void *data); /** * ieee80211_iterate_stations_atomic - iterate stations diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 29567875f428..dcaee24a4d87 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -165,7 +165,7 @@ struct net { struct netns_xfrm xfrm; #endif - atomic64_t net_cookie; /* written once */ + u64 net_cookie; /* written once */ #if IS_ENABLED(CONFIG_IP_VS) struct netns_ipvs *ipvs; @@ -224,8 +224,6 @@ extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); struct net *get_net_ns_by_fd(int fd); -u64 __net_gen_cookie(struct net *net); - #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); void ipx_unregister_sysctl(void); diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 16e8b2f8d006..54c4d5c908a5 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -107,6 +107,10 @@ struct flow_offload_tuple { u8 l3proto; u8 l4proto; + + /* All members above are keys for lookups, see flow_offload_hash(). */ + struct { } __hash; + u8 dir; u16 mtu; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f4af8362d234..fdec57d862b7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -200,14 +200,13 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) } int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); -unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); -int nft_validate_register_load(enum nft_registers reg, unsigned int len); -int nft_validate_register_store(const struct nft_ctx *ctx, - enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type, unsigned int len); +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); /** * struct nft_userdata - user defined data associated with an object @@ -721,6 +720,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, const u32 *data, u64 timeout, u64 expiration, gfp_t gfp); +int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_expr *expr_array[]); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); @@ -1105,11 +1106,17 @@ struct nft_table { u16 family:6, flags:8, genmask:2; + u32 nlpid; char *name; u16 udlen; u8 *udata; }; +static inline bool nft_table_has_owner(const struct nft_table *table) +{ + return table->flags & NFT_TABLE_F_OWNER; +} + static inline bool nft_base_chain_netdev(int family, u32 hooknum) { return family == NFPROTO_NETDEV || diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8657e6815b07..fd10a7862fdc 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -26,21 +26,21 @@ void nf_tables_core_module_exit(void); struct nft_bitwise_fast_expr { u32 mask; u32 xor; - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; }; struct nft_cmp_fast_expr { u32 data; u32 mask; - enum nft_registers sreg:8; + u8 sreg; u8 len; bool inv; }; struct nft_immediate_expr { struct nft_data data; - enum nft_registers dreg:8; + u8 dreg; u8 dlen; }; @@ -60,14 +60,14 @@ struct nft_payload { enum nft_payload_bases base:8; u8 offset; u8 len; - enum nft_registers dreg:8; + u8 dreg; }; struct nft_payload_set { enum nft_payload_bases base:8; u8 offset; u8 len; - enum nft_registers sreg:8; + u8 sreg; u8 csum_type; u8 csum_offset; u8 csum_flags; diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 628b6fa579cd..237f3757637e 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -5,7 +5,7 @@ #include <net/netfilter/nf_tables.h> struct nft_fib { - enum nft_registers dreg:8; + u8 dreg; u8 result; u32 flags; }; diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 07e2fd507963..2dce55c736f4 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -7,8 +7,8 @@ struct nft_meta { enum nft_meta_keys key:8; union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; }; }; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8e4fcac4df72..70a2a085dd1a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -188,6 +188,8 @@ struct netns_ipv4 { int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; + int sysctl_fib_notify_on_flag_change; + #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5ec054473d81..21c0debbd39e 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -51,6 +51,7 @@ struct netns_sysctl_ipv6 { int max_hbh_opts_len; int seg6_flowlabel; bool skip_notify_on_dev_down; + int fib_notify_on_flag_change; }; struct netns_ipv6 { diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 226930d66b63..7bc057aee40b 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -66,7 +66,12 @@ struct nh_info { struct nh_grp_entry { struct nexthop *nh; u8 weight; - atomic_t upper_bound; + + union { + struct { + atomic_t upper_bound; + } mpath; + }; struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ @@ -109,6 +114,11 @@ enum nexthop_event_type { NEXTHOP_EVENT_REPLACE, }; +enum nh_notifier_info_type { + NH_NOTIFIER_INFO_TYPE_SINGLE, + NH_NOTIFIER_INFO_TYPE_GRP, +}; + struct nh_notifier_single_info { struct net_device *dev; u8 gw_family; @@ -137,7 +147,7 @@ struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; u32 id; - bool is_grp; + enum nh_notifier_info_type type; union { struct nh_notifier_single_info *nh; struct nh_notifier_grp_info *nh_grp; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f2a9c44171c..255e4f4b521f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -783,6 +783,42 @@ struct tc_mq_qopt_offload { }; }; +enum tc_htb_command { + /* Root */ + TC_HTB_CREATE, /* Initialize HTB offload. */ + TC_HTB_DESTROY, /* Destroy HTB offload. */ + + /* Classes */ + /* Allocate qid and create leaf. */ + TC_HTB_LEAF_ALLOC_QUEUE, + /* Convert leaf to inner, preserve and return qid, create new leaf. */ + TC_HTB_LEAF_TO_INNER, + /* Delete leaf, while siblings remain. */ + TC_HTB_LEAF_DEL, + /* Delete leaf, convert parent to leaf, preserving qid. */ + TC_HTB_LEAF_DEL_LAST, + /* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */ + TC_HTB_LEAF_DEL_LAST_FORCE, + /* Modify parameters of a node. */ + TC_HTB_NODE_MODIFY, + + /* Class qdisc */ + TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */ +}; + +struct tc_htb_qopt_offload { + struct netlink_ext_ack *extack; + enum tc_htb_command command; + u16 classid; + u32 parent_classid; + u16 qid; + u16 moved_qid; + u64 rate; + u64 ceil; +}; + +#define TC_HTB_CLASSID_ROOT U32_MAX + enum tc_red_command { TC_RED_REPLACE, TC_RED_DESTROY, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 639e465a108f..2d6eb60c58c8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -210,7 +210,8 @@ struct Qdisc_class_ops { int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *, struct netlink_ext_ack *); - int (*delete)(struct Qdisc *, unsigned long); + int (*delete)(struct Qdisc *, unsigned long, + struct netlink_ext_ack *); void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ @@ -388,6 +389,7 @@ struct qdisc_skb_cb { #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; u16 mru; + bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); @@ -551,14 +553,20 @@ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) return qdisc->dev_queue->dev; } -static inline void sch_tree_lock(const struct Qdisc *q) +static inline void sch_tree_lock(struct Qdisc *q) { - spin_lock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_lock_bh(qdisc_lock(q)); + else + spin_lock_bh(qdisc_root_sleeping_lock(q)); } -static inline void sch_tree_unlock(const struct Qdisc *q) +static inline void sch_tree_unlock(struct Qdisc *q) { - spin_unlock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_unlock_bh(qdisc_lock(q)); + else + spin_unlock_bh(qdisc_root_sleeping_lock(q)); } extern struct Qdisc noop_qdisc; @@ -1143,7 +1151,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) - qdisc_tree_flush_backlog(old); + qdisc_purge_queue(old); sch_tree_unlock(sch); return old; diff --git a/include/net/sock.h b/include/net/sock.h index bdc4323ce53c..636810ddcd9b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -226,7 +226,7 @@ struct sock_common { struct hlist_nulls_node skc_nulls_node; }; unsigned short skc_tx_queue_mapping; -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING unsigned short skc_rx_queue_mapping; #endif union { @@ -356,7 +356,7 @@ struct sock { #define sk_nulls_node __sk_common.skc_nulls_node #define sk_refcnt __sk_common.skc_refcnt #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING #define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping #endif @@ -1174,6 +1174,8 @@ struct proto { int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); + bool (*bpf_bypass_getsockopt)(int level, + int optname); void (*release_cb)(struct sock *sk); @@ -1350,14 +1352,18 @@ sk_memory_allocated_sub(struct sock *sk, int amt) atomic_long_sub(amt, sk->sk_prot->memory_allocated); } +#define SK_ALLOC_PERCPU_COUNTER_BATCH 16 + static inline void sk_sockets_allocated_dec(struct sock *sk) { - percpu_counter_dec(sk->sk_prot->sockets_allocated); + percpu_counter_add_batch(sk->sk_prot->sockets_allocated, -1, + SK_ALLOC_PERCPU_COUNTER_BATCH); } static inline void sk_sockets_allocated_inc(struct sock *sk) { - percpu_counter_inc(sk->sk_prot->sockets_allocated); + percpu_counter_add_batch(sk->sk_prot->sockets_allocated, 1, + SK_ALLOC_PERCPU_COUNTER_BATCH); } static inline u64 @@ -1834,7 +1840,7 @@ static inline int sk_tx_queue_get(const struct sock *sk) static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) { -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING if (skb_rx_queue_recorded(skb)) { u16 rx_queue = skb_get_rx_queue(skb); @@ -1848,20 +1854,20 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) static inline void sk_rx_queue_clear(struct sock *sk) { -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING; #endif } -#ifdef CONFIG_XPS static inline int sk_rx_queue_get(const struct sock *sk) { +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING) return sk->sk_rx_queue_mapping; +#endif return -1; } -#endif static inline void sk_set_socket(struct sock *sk, struct socket *sock) { @@ -1921,10 +1927,13 @@ static inline void sk_set_txhash(struct sock *sk) sk->sk_txhash = net_tx_rndhash(); } -static inline void sk_rethink_txhash(struct sock *sk) +static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) + if (sk->sk_txhash) { sk_set_txhash(sk); + return true; + } + return false; } static inline struct dst_entry * @@ -1947,12 +1956,10 @@ sk_dst_get(struct sock *sk) return dst; } -static inline void dst_negative_advice(struct sock *sk) +static inline void __dst_negative_advice(struct sock *sk) { struct dst_entry *ndst, *dst = __sk_dst_get(sk); - sk_rethink_txhash(sk); - if (dst && dst->ops->negative_advice) { ndst = dst->ops->negative_advice(dst); @@ -1964,6 +1971,12 @@ static inline void dst_negative_advice(struct sock *sk) } } +static inline void dst_negative_advice(struct sock *sk) +{ + sk_rethink_txhash(sk); + __dst_negative_advice(sk); +} + static inline void __sk_dst_set(struct sock *sk, struct dst_entry *dst) { diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 99cd538d6519..b7fc7d0f54e2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -16,20 +16,6 @@ #define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1) #define SWITCHDEV_F_DEFER BIT(2) -struct switchdev_trans { - bool ph_prepare; -}; - -static inline bool switchdev_trans_ph_prepare(struct switchdev_trans *trans) -{ - return trans && trans->ph_prepare; -} - -static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans) -{ - return trans && !trans->ph_prepare; -} - enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, SWITCHDEV_ATTR_ID_PORT_STP_STATE, @@ -41,10 +27,12 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, -#if IS_ENABLED(CONFIG_BRIDGE_MRP) - SWITCHDEV_ATTR_ID_MRP_PORT_STATE, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, -#endif +}; + +struct switchdev_brport_flags { + unsigned long val; + unsigned long mask; }; struct switchdev_attr { @@ -55,16 +43,13 @@ struct switchdev_attr { void (*complete)(struct net_device *dev, int err, void *priv); union { u8 stp_state; /* PORT_STP_STATE */ - unsigned long brport_flags; /* PORT_{PRE}_BRIDGE_FLAGS */ + struct switchdev_brport_flags brport_flags; /* PORT_BRIDGE_FLAGS */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ bool mc_disabled; /* MC_DISABLED */ -#if IS_ENABLED(CONFIG_BRIDGE_MRP) - u8 mrp_port_state; /* MRP_PORT_STATE */ u8 mrp_port_role; /* MRP_PORT_ROLE */ -#endif } u; }; @@ -73,7 +58,6 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_PORT_VLAN, SWITCHDEV_OBJ_ID_PORT_MDB, SWITCHDEV_OBJ_ID_HOST_MDB, -#if IS_ENABLED(CONFIG_BRIDGE_MRP) SWITCHDEV_OBJ_ID_MRP, SWITCHDEV_OBJ_ID_RING_TEST_MRP, SWITCHDEV_OBJ_ID_RING_ROLE_MRP, @@ -81,8 +65,6 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_IN_TEST_MRP, SWITCHDEV_OBJ_ID_IN_ROLE_MRP, SWITCHDEV_OBJ_ID_IN_STATE_MRP, - -#endif }; struct switchdev_obj { @@ -97,8 +79,7 @@ struct switchdev_obj { struct switchdev_obj_port_vlan { struct switchdev_obj obj; u16 flags; - u16 vid_begin; - u16 vid_end; + u16 vid; }; #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \ @@ -115,7 +96,6 @@ struct switchdev_obj_port_mdb { container_of((OBJ), struct switchdev_obj_port_mdb, obj) -#if IS_ENABLED(CONFIG_BRIDGE_MRP) /* SWITCHDEV_OBJ_ID_MRP */ struct switchdev_obj_mrp { struct switchdev_obj obj; @@ -147,6 +127,7 @@ struct switchdev_obj_ring_role_mrp { struct switchdev_obj obj; u8 ring_role; u32 ring_id; + u8 sw_backup; }; #define SWITCHDEV_OBJ_RING_ROLE_MRP(OBJ) \ @@ -181,6 +162,7 @@ struct switchdev_obj_in_role_mrp { u32 ring_id; u16 in_id; u8 in_role; + u8 sw_backup; }; #define SWITCHDEV_OBJ_IN_ROLE_MRP(OBJ) \ @@ -195,8 +177,6 @@ struct switchdev_obj_in_state_mrp { #define SWITCHDEV_OBJ_IN_STATE_MRP(OBJ) \ container_of((OBJ), struct switchdev_obj_in_state_mrp, obj) -#endif - typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); enum switchdev_notifier_type { @@ -234,14 +214,12 @@ struct switchdev_notifier_fdb_info { struct switchdev_notifier_port_obj_info { struct switchdev_notifier_info info; /* must be first */ const struct switchdev_obj *obj; - struct switchdev_trans *trans; bool handled; }; struct switchdev_notifier_port_attr_info { struct switchdev_notifier_info info; /* must be first */ const struct switchdev_attr *attr; - struct switchdev_trans *trans; bool handled; }; @@ -261,7 +239,8 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info) void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr); + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); @@ -289,7 +268,6 @@ int switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)); int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -302,7 +280,7 @@ int switchdev_handle_port_attr_set(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, const struct switchdev_attr *attr, - struct switchdev_trans *trans)); + struct netlink_ext_ack *extack)); #else static inline void switchdev_deferred_process(void) @@ -310,7 +288,8 @@ static inline void switchdev_deferred_process(void) } static inline int switchdev_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr) + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } @@ -373,7 +352,6 @@ switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)) { return 0; @@ -395,7 +373,7 @@ switchdev_handle_port_attr_set(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, const struct switchdev_attr *attr, - struct switchdev_trans *trans)) + struct netlink_ext_ack *extack)) { return 0; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 78d13c88720f..963cd86d12dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -403,6 +403,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +bool tcp_bpf_bypass_getsockopt(int level, int optname); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); @@ -630,6 +631,7 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); unsigned int tcp_current_mss(struct sock *sk); +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) @@ -1430,12 +1432,29 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied); */ static inline bool tcp_rmem_pressure(const struct sock *sk) { - int rcvbuf = READ_ONCE(sk->sk_rcvbuf); - int threshold = rcvbuf - (rcvbuf >> 3); + int rcvbuf, threshold; + + if (tcp_under_memory_pressure(sk)) + return true; + + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + threshold = rcvbuf - (rcvbuf >> 3); return atomic_read(&sk->sk_rmem_alloc) > threshold; } +static inline bool tcp_epollin_ready(const struct sock *sk, int target) +{ + const struct tcp_sock *tp = tcp_sk(sk); + int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); + + if (avail <= 0) + return false; + + return (avail >= target) || tcp_rmem_pressure(sk) || + (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss); +} + extern void tcp_openreq_init_rwin(struct request_sock *req, const struct sock *sk_listener, const struct dst_entry *dst); @@ -2060,7 +2079,7 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb); void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); -extern void tcp_rack_mark_lost(struct sock *sk); +extern bool tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); diff --git a/include/net/udp.h b/include/net/udp.h index 877832bed471..a132a02b2f2c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -173,12 +173,15 @@ INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); + struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, struct sock *sk); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features); + netdev_features_t features, bool is_ipv6); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { @@ -467,6 +470,7 @@ void udp_init(void); DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); +void udp_encap_disable(void); #if IS_ENABLED(CONFIG_IPV6) DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 2ea453dac876..afc7ce713657 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -129,12 +129,16 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); static inline void udp_tunnel_get_rx_info(struct net_device *dev) { ASSERT_RTNL(); + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } static inline void udp_tunnel_drop_rx_info(struct net_device *dev) { ASSERT_RTNL(); + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); } @@ -177,9 +181,8 @@ static inline void udp_tunnel_encap_enable(struct socket *sock) #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); - else #endif - udp_encap_enable(); + udp_encap_enable(); } #define UDP_TUNNEL_NIC_MAX_TABLES 4 @@ -323,6 +326,8 @@ udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, static inline void udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) { + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->add_port(dev, ti); } @@ -330,6 +335,8 @@ udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) static inline void udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) { + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->del_port(dev, ti); } diff --git a/include/net/xdp.h b/include/net/xdp.h index 600acb307db6..a5bc214a49d9 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -76,6 +76,25 @@ struct xdp_buff { u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ }; +static __always_inline void +xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) +{ + xdp->frame_sz = frame_sz; + xdp->rxq = rxq; +} + +static __always_inline void +xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, + int headroom, int data_len, const bool meta_valid) +{ + unsigned char *data = hard_start + headroom; + + xdp->data_hard_start = hard_start; + xdp->data = data; + xdp->data_end = data + data_len; + xdp->data_meta = meta_valid ? data : data + 1; +} + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the @@ -145,6 +164,12 @@ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); +struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, + struct sk_buff *skb, + struct net_device *dev); +struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf, + struct net_device *dev); +int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp); static inline void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) |