diff options
Diffstat (limited to 'net')
280 files changed, 7724 insertions, 5284 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 458040e8a0e0..91d134961357 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -725,7 +725,7 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int vlan_dev_netpoll_setup(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index cf5219df7903..134419667d59 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -161,10 +161,8 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -ENODEV; } - if (data[IFLA_VLAN_PROTOCOL]) - proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]); - else - proto = htons(ETH_P_8021Q); + proto = nla_get_be16_default(data[IFLA_VLAN_PROTOCOL], + htons(ETH_P_8021Q)); vlan->vlan_proto = proto; vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); diff --git a/net/Kconfig b/net/Kconfig index a629f92dc86b..c3fca69a7c83 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -72,6 +72,9 @@ config NET_DEVMEM depends on GENERIC_ALLOCATOR depends on PAGE_POOL +config NET_SHAPER + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/Kconfig.debug b/net/Kconfig.debug index 5e3fffe707dd..277fab8c4d77 100644 --- a/net/Kconfig.debug +++ b/net/Kconfig.debug @@ -24,3 +24,18 @@ config DEBUG_NET help Enable extra sanity checks in networking. This is mostly used by fuzzers, but is safe to select. + +config DEBUG_NET_SMALL_RTNL + bool "Add extra per-netns mutex inside RTNL" + depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT + select PROVE_LOCKING + default n + help + rtnl_lock() is being replaced with rtnl_net_lock() that + acquires the global RTNL and a small per-netns RTNL mutex. + + During the conversion, rtnl_net_lock() just adds an extra + mutex in every RTNL scope and slows down the operations. + + Once the conversion completes, rtnl_lock() will be removed + and rtnetlink will gain per-netns scalability. diff --git a/net/Makefile b/net/Makefile index 65bb8c72a35e..60ed5190eda8 100644 --- a/net/Makefile +++ b/net/Makefile @@ -79,3 +79,4 @@ obj-$(CONFIG_XDP_SOCKETS) += xdp/ obj-$(CONFIG_MPTCP) += mptcp/ obj-$(CONFIG_MCTP) += mctp/ obj-$(CONFIG_NET_HANDSHAKE) += handshake/ +obj-$(CONFIG_NET_SHAPER) += shaper/ diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile index 33164d972d37..152312a15180 100644 --- a/net/appletalk/Makefile +++ b/net/appletalk/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_ATALK) += appletalk.o -appletalk-y := aarp.o ddp.o dev.o +appletalk-y := aarp.o ddp.o appletalk-$(CONFIG_PROC_FS) += atalk_proc.o appletalk-$(CONFIG_SYSCTL) += sysctl_net_atalk.o diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c deleted file mode 100644 index 284c8e585533..000000000000 --- a/net/appletalk/dev.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Moved here from drivers/net/net_init.c, which is: - * Written 1993,1994,1995 by Donald Becker. - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/if_ltalk.h> - -static void ltalk_setup(struct net_device *dev) -{ - /* Fill in the fields of the device structure with localtalk-generic values. */ - - dev->type = ARPHRD_LOCALTLK; - dev->hard_header_len = LTALK_HLEN; - dev->mtu = LTALK_MTU; - dev->addr_len = LTALK_ALEN; - dev->tx_queue_len = 10; - - dev->broadcast[0] = 0xFF; - - dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP; -} - -/** - * alloc_ltalkdev - Allocates and sets up an localtalk device - * @sizeof_priv: Size of additional driver-private structure to be allocated - * for this localtalk device - * - * Fill in the fields of the device structure with localtalk-generic - * values. Basically does everything except registering the device. - * - * Constructs a new net device, complete with a private data area of - * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for - * this private data area. - */ - -struct net_device *alloc_ltalkdev(int sizeof_priv) -{ - return alloc_netdev(sizeof_priv, "lt%d", NET_NAME_UNKNOWN, - ltalk_setup); -} -EXPORT_SYMBOL(alloc_ltalkdev); diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 74b49c35ddc1..07ae5dd1f150 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -36,6 +36,7 @@ #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/types.h> #include <linux/workqueue.h> #include <net/genetlink.h> @@ -371,8 +372,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, batadv_ogm_packet->ttl, - ((batadv_ogm_packet->flags & BATADV_DIRECTLINK) ? - "on" : "off"), + str_on_off(batadv_ogm_packet->flags & BATADV_DIRECTLINK), hard_iface->net_dev->name, hard_iface->net_dev->dev_addr); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 5f46ca3d4bb8..449faf5a5487 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -33,6 +33,7 @@ #include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/workqueue.h> #include <net/arp.h> #include <net/genetlink.h> @@ -1946,16 +1947,15 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, claim = batadv_claim_hash_find(bat_priv, &search_claim); if (!claim) { + bool local = batadv_is_my_client(bat_priv, ethhdr->h_source, vid); + /* possible optimization: race for a claim */ /* No claim exists yet, claim it for us! */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): Unclaimed MAC %pM found. Claim it. Local: %s\n", - __func__, ethhdr->h_source, - batadv_is_my_client(bat_priv, - ethhdr->h_source, vid) ? - "yes" : "no"); + __func__, ethhdr->h_source, str_yes_no(local)); batadv_handle_claim(bat_priv, primary_if, primary_if->net_dev->dev_addr, ethhdr->h_source, vid); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 3d4c36ae2e1a..97ea71a052f8 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2024.2" +#define BATADV_SOURCE_VERSION "2024.3" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2243cec18ecc..b44c382226a1 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -28,6 +28,7 @@ #include <linux/net.h> #include <linux/netdevice.h> #include <linux/netlink.h> +#include <linux/overflow.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> @@ -209,20 +210,6 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, } /** - * batadv_tt_local_entry_free_rcu() - free the tt_local_entry - * @rcu: rcu pointer of the tt_local_entry - */ -static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_local_entry *tt_local_entry; - - tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, - common.rcu); - - kmem_cache_free(batadv_tl_cache, tt_local_entry); -} - -/** * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue * for free after rcu grace period * @ref: kref pointer of the nc_node @@ -236,7 +223,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) batadv_softif_vlan_put(tt_local_entry->vlan); - call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); + kfree_rcu(tt_local_entry, common.rcu); } /** @@ -255,20 +242,6 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) } /** - * batadv_tt_global_entry_free_rcu() - free the tt_global_entry - * @rcu: rcu pointer of the tt_global_entry - */ -static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_global_entry *tt_global_entry; - - tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, - common.rcu); - - kmem_cache_free(batadv_tg_cache, tt_global_entry); -} - -/** * batadv_tt_global_entry_release() - release tt_global_entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the nc_node @@ -282,7 +255,7 @@ void batadv_tt_global_entry_release(struct kref *ref) batadv_tt_global_del_orig_list(tt_global_entry); - call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); + kfree_rcu(tt_global_entry, common.rcu); } /** @@ -408,19 +381,6 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, } /** - * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry - * @rcu: rcu pointer of the orig_entry - */ -static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_orig_list_entry *orig_entry; - - orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); - - kmem_cache_free(batadv_tt_orig_cache, orig_entry); -} - -/** * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the tt orig entry @@ -433,7 +393,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) refcount); batadv_orig_node_put(orig_entry->orig_node); - call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); + kfree_rcu(orig_entry, rcu); } /** @@ -856,8 +816,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, num_entries += atomic_read(&vlan->tt.num_entries); } - change_offset = sizeof(**tt_data); - change_offset += num_vlan * sizeof(*tt_vlan); + change_offset = struct_size(*tt_data, vlan_data, num_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) @@ -876,7 +835,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, (*tt_data)->ttvn = atomic_read(&orig_node->last_ttvn); (*tt_data)->num_vlan = htons(num_vlan); - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); + tt_vlan = (*tt_data)->vlan_data; hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); @@ -936,8 +895,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, total_entries += vlan_entries; } - change_offset = sizeof(**tt_data); - change_offset += num_vlan * sizeof(*tt_vlan); + change_offset = struct_size(*tt_data, vlan_data, num_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) @@ -956,7 +914,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->ttvn = atomic_read(&bat_priv->tt.vn); (*tt_data)->num_vlan = htons(num_vlan); - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); + tt_vlan = (*tt_data)->vlan_data; hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) @@ -2916,7 +2874,6 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tt_req_node *tt_req_node = NULL; - struct batadv_tvlv_tt_vlan_data *tt_vlan_req; struct batadv_hard_iface *primary_if; bool ret = false; int i, size; @@ -2932,7 +2889,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - size = sizeof(*tvlv_tt_data) + sizeof(*tt_vlan_req) * num_vlan; + size = struct_size(tvlv_tt_data, vlan_data, num_vlan); tvlv_tt_data = kzalloc(size, GFP_ATOMIC); if (!tvlv_tt_data) goto out; @@ -2944,12 +2901,10 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, /* send all the CRCs within the request. This is needed by intermediate * nodes to ensure they have the correct table before replying */ - tt_vlan_req = (struct batadv_tvlv_tt_vlan_data *)(tvlv_tt_data + 1); for (i = 0; i < num_vlan; i++) { - tt_vlan_req->vid = tt_vlan->vid; - tt_vlan_req->crc = tt_vlan->crc; + tvlv_tt_data->vlan_data[i].vid = tt_vlan->vid; + tvlv_tt_data->vlan_data[i].crc = tt_vlan->crc; - tt_vlan_req++; tt_vlan++; } @@ -3001,7 +2956,6 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *res_dst_orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; - struct batadv_tvlv_tt_vlan_data *tt_vlan; bool ret = false, full_table; u8 orig_ttvn, req_ttvn; u16 tvlv_len; @@ -3024,10 +2978,9 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, orig_ttvn = (u8)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_data->ttvn; - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); /* this node doesn't have the requested data */ if (orig_ttvn != req_ttvn || - !batadv_tt_global_check_crc(req_dst_orig_node, tt_vlan, + !batadv_tt_global_check_crc(req_dst_orig_node, tt_data->vlan_data, ntohs(tt_data->num_vlan))) goto out; @@ -3370,7 +3323,6 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; u8 *tvlv_ptr = (u8 *)tt_data; - u16 change_offset; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", @@ -3383,10 +3335,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, spin_lock_bh(&orig_node->tt_lock); - change_offset = sizeof(struct batadv_tvlv_tt_vlan_data); - change_offset *= ntohs(tt_data->num_vlan); - change_offset += sizeof(*tt_data); - tvlv_ptr += change_offset; + tvlv_ptr += struct_size(tt_data, vlan_data, ntohs(tt_data->num_vlan)); tt_change = (struct batadv_tvlv_tt_change *)tvlv_ptr; if (tt_data->flags & BATADV_TT_FULL_TABLE) { @@ -3985,10 +3934,10 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, u8 flags, void *tvlv_value, u16 tvlv_value_len) { - struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; u16 num_entries, num_vlan; + size_t flex_size; if (tvlv_value_len < sizeof(*tt_data)) return; @@ -3998,17 +3947,18 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, num_vlan = ntohs(tt_data->num_vlan); - if (tvlv_value_len < sizeof(*tt_vlan) * num_vlan) + flex_size = flex_array_size(tt_data, vlan_data, num_vlan); + if (tvlv_value_len < flex_size) return; - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); - tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan); - tvlv_value_len -= sizeof(*tt_vlan) * num_vlan; + tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data + + flex_size); + tvlv_value_len -= flex_size; num_entries = batadv_tt_entries(tvlv_value_len); - batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change, - num_entries, tt_data->ttvn); + batadv_tt_update_orig(bat_priv, orig, tt_data->vlan_data, num_vlan, + tt_change, num_entries, tt_data->ttvn); } /** @@ -4039,8 +3989,8 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, tt_data = tvlv_value; tvlv_value_len -= sizeof(*tt_data); - tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data); - tt_vlan_len *= ntohs(tt_data->num_vlan); + tt_vlan_len = flex_array_size(tt_data, vlan_data, + ntohs(tt_data->num_vlan)); if (tvlv_value_len < tt_vlan_len) return NET_RX_SUCCESS; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c4c74b82ed21..d097e308a755 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -952,6 +952,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t conn->tx_power = HCI_TX_POWER_INVALID; conn->max_tx_power = HCI_TX_POWER_INVALID; conn->sync_handle = HCI_SYNC_HANDLE_INVALID; + conn->sid = HCI_SID_INVALID; set_bit(HCI_CONN_POWER_SAVE, &conn->flags); conn->disc_timeout = HCI_DISCONN_TIMEOUT; @@ -1127,9 +1128,9 @@ void hci_conn_del(struct hci_conn *conn) hci_conn_unlink(conn); - cancel_delayed_work_sync(&conn->disc_work); - cancel_delayed_work_sync(&conn->auto_accept_work); - cancel_delayed_work_sync(&conn->idle_work); + disable_delayed_work_sync(&conn->disc_work); + disable_delayed_work_sync(&conn->auto_accept_work); + disable_delayed_work_sync(&conn->idle_work); if (conn->type == ACL_LINK) { /* Unacked frames */ @@ -2062,105 +2063,217 @@ static int create_big_sync(struct hci_dev *hdev, void *data) static void create_pa_complete(struct hci_dev *hdev, void *data, int err) { - struct hci_cp_le_pa_create_sync *cp = data; - bt_dev_dbg(hdev, ""); if (err) bt_dev_err(hdev, "Unable to create PA: %d", err); +} - kfree(cp); +static bool hci_conn_check_create_pa_sync(struct hci_conn *conn) +{ + if (conn->type != ISO_LINK || conn->sid == HCI_SID_INVALID) + return false; + + return true; } static int create_pa_sync(struct hci_dev *hdev, void *data) { - struct hci_cp_le_pa_create_sync *cp = data; - int err; + struct hci_cp_le_pa_create_sync cp = {0}; + struct hci_conn *conn; + int err = 0; - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC, - sizeof(*cp), cp, HCI_CMD_TIMEOUT); - if (err) { - hci_dev_clear_flag(hdev, HCI_PA_SYNC); - return err; + hci_dev_lock(hdev); + + rcu_read_lock(); + + /* The spec allows only one pending LE Periodic Advertising Create + * Sync command at a time. If the command is pending now, don't do + * anything. We check for pending connections after each PA Sync + * Established event. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2493: + * + * If the Host issues this command when another HCI_LE_Periodic_ + * Advertising_Create_Sync command is pending, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { + if (test_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags)) + goto unlock; } - return hci_update_passive_scan_sync(hdev); + list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { + if (hci_conn_check_create_pa_sync(conn)) { + struct bt_iso_qos *qos = &conn->iso_qos; + + cp.options = qos->bcast.options; + cp.sid = conn->sid; + cp.addr_type = conn->dst_type; + bacpy(&cp.addr, &conn->dst); + cp.skip = cpu_to_le16(qos->bcast.skip); + cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); + cp.sync_cte_type = qos->bcast.sync_cte_type; + + break; + } + } + +unlock: + rcu_read_unlock(); + + hci_dev_unlock(hdev); + + if (bacmp(&cp.addr, BDADDR_ANY)) { + hci_dev_set_flag(hdev, HCI_PA_SYNC); + set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); + + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); + if (!err) + err = hci_update_passive_scan_sync(hdev); + + if (err) { + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); + } + } + + return err; +} + +int hci_pa_create_sync_pending(struct hci_dev *hdev) +{ + /* Queue start pa_create_sync and scan */ + return hci_cmd_sync_queue(hdev, create_pa_sync, + NULL, create_pa_complete); } struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos) { - struct hci_cp_le_pa_create_sync *cp; struct hci_conn *conn; - int err; - - if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) - return ERR_PTR(-EBUSY); conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE); if (IS_ERR(conn)) return conn; conn->iso_qos = *qos; + conn->dst_type = dst_type; + conn->sid = sid; conn->state = BT_LISTEN; hci_conn_hold(conn); - cp = kzalloc(sizeof(*cp), GFP_KERNEL); - if (!cp) { - hci_dev_clear_flag(hdev, HCI_PA_SYNC); - hci_conn_drop(conn); - return ERR_PTR(-ENOMEM); + hci_pa_create_sync_pending(hdev); + + return conn; +} + +static bool hci_conn_check_create_big_sync(struct hci_conn *conn) +{ + if (!conn->num_bis) + return false; + + return true; +} + +static void big_create_sync_complete(struct hci_dev *hdev, void *data, int err) +{ + bt_dev_dbg(hdev, ""); + + if (err) + bt_dev_err(hdev, "Unable to create BIG sync: %d", err); +} + +static int big_create_sync(struct hci_dev *hdev, void *data) +{ + DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11); + struct hci_conn *conn; + + rcu_read_lock(); + + pdu->num_bis = 0; + + /* The spec allows only one pending LE BIG Create Sync command at + * a time. If the command is pending now, don't do anything. We + * check for pending connections after each BIG Sync Established + * event. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2586: + * + * If the Host sends this command when the Controller is in the + * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_ + * Established event has not been generated, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { + if (test_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags)) + goto unlock; } - cp->options = qos->bcast.options; - cp->sid = sid; - cp->addr_type = dst_type; - bacpy(&cp->addr, dst); - cp->skip = cpu_to_le16(qos->bcast.skip); - cp->sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); - cp->sync_cte_type = qos->bcast.sync_cte_type; + list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { + if (hci_conn_check_create_big_sync(conn)) { + struct bt_iso_qos *qos = &conn->iso_qos; - /* Queue start pa_create_sync and scan */ - err = hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete); - if (err < 0) { - hci_conn_drop(conn); - kfree(cp); - return ERR_PTR(err); + set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); + + pdu->handle = qos->bcast.big; + pdu->sync_handle = cpu_to_le16(conn->sync_handle); + pdu->encryption = qos->bcast.encryption; + memcpy(pdu->bcode, qos->bcast.bcode, + sizeof(pdu->bcode)); + pdu->mse = qos->bcast.mse; + pdu->timeout = cpu_to_le16(qos->bcast.timeout); + pdu->num_bis = conn->num_bis; + memcpy(pdu->bis, conn->bis, conn->num_bis); + + break; + } } - return conn; +unlock: + rcu_read_unlock(); + + if (!pdu->num_bis) + return 0; + + return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, + struct_size(pdu, bis, pdu->num_bis), pdu); +} + +int hci_le_big_create_sync_pending(struct hci_dev *hdev) +{ + /* Queue big_create_sync */ + return hci_cmd_sync_queue_once(hdev, big_create_sync, + NULL, big_create_sync_complete); } int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, struct bt_iso_qos *qos, __u16 sync_handle, __u8 num_bis, __u8 bis[]) { - DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11); int err; - if (num_bis < 0x01 || num_bis > pdu->num_bis) + if (num_bis < 0x01 || num_bis > ISO_MAX_NUM_BIS) return -EINVAL; err = qos_set_big(hdev, qos); if (err) return err; - if (hcon) - hcon->iso_qos.bcast.big = qos->bcast.big; + if (hcon) { + /* Update hcon QoS */ + hcon->iso_qos = *qos; - pdu->handle = qos->bcast.big; - pdu->sync_handle = cpu_to_le16(sync_handle); - pdu->encryption = qos->bcast.encryption; - memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode)); - pdu->mse = qos->bcast.mse; - pdu->timeout = cpu_to_le16(qos->bcast.timeout); - pdu->num_bis = num_bis; - memcpy(pdu->bis, bis, num_bis); + hcon->num_bis = num_bis; + memcpy(hcon->bis, bis, num_bis); + } - return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, - struct_size(pdu, bis, num_bis), pdu); + return hci_le_big_create_sync_pending(hdev); } static void create_big_complete(struct hci_dev *hdev, void *data, int err) @@ -2224,13 +2337,9 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, conn->iso_qos.bcast.big); if (parent && parent != conn) { link = hci_conn_link(parent, conn); - if (!link) { - hci_conn_drop(conn); - return ERR_PTR(-ENOLINK); - } - - /* Link takes the refcount */ hci_conn_drop(conn); + if (!link) + return ERR_PTR(-ENOLINK); } return conn; @@ -2320,15 +2429,12 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, } link = hci_conn_link(le, cis); + hci_conn_drop(cis); if (!link) { hci_conn_drop(le); - hci_conn_drop(cis); return ERR_PTR(-ENOLINK); } - /* Link takes the refcount */ - hci_conn_drop(cis); - cis->state = BT_CONNECT; hci_le_create_cis_pending(hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0ac354db8177..f9e19f9cb5a3 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3771,18 +3771,22 @@ static void hci_tx_work(struct work_struct *work) /* ACL data packet */ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_acl_hdr *hdr = (void *) skb->data; + struct hci_acl_hdr *hdr; struct hci_conn *conn; __u16 handle, flags; - skb_pull(skb, HCI_ACL_HDR_SIZE); + hdr = skb_pull_data(skb, sizeof(*hdr)); + if (!hdr) { + bt_dev_err(hdev, "ACL packet too small"); + goto drop; + } handle = __le16_to_cpu(hdr->handle); flags = hci_flags(handle); handle = hci_handle(handle); - BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len, - handle, flags); + bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len, + handle, flags); hdev->stat.acl_rx++; @@ -3801,24 +3805,29 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) handle); } +drop: kfree_skb(skb); } /* SCO data packet */ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_sco_hdr *hdr = (void *) skb->data; + struct hci_sco_hdr *hdr; struct hci_conn *conn; __u16 handle, flags; - skb_pull(skb, HCI_SCO_HDR_SIZE); + hdr = skb_pull_data(skb, sizeof(*hdr)); + if (!hdr) { + bt_dev_err(hdev, "SCO packet too small"); + goto drop; + } handle = __le16_to_cpu(hdr->handle); flags = hci_flags(handle); handle = hci_handle(handle); - BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len, - handle, flags); + bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len, + handle, flags); hdev->stat.sco_rx++; @@ -3836,6 +3845,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) handle); } +drop: kfree_skb(skb); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7b35c58bbbeb..1427d6e2f3c9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3624,6 +3624,13 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, goto unlock; } + /* We skip the WRITE_AUTH_PAYLOAD_TIMEOUT for ATS2851 based controllers + * to avoid unexpected SMP command errors when pairing. + */ + if (test_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT, + &hdev->quirks)) + goto notify; + /* Set the default Authenticated Payload Timeout after * an LE Link is established. As per Core Spec v5.0, Vol 2, Part B * Section 3.3, the HCI command WRITE_AUTH_PAYLOAD_TIMEOUT should be @@ -6343,7 +6350,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, struct hci_ev_le_pa_sync_established *ev = data; int mask = hdev->link_mode; __u8 flags = 0; - struct hci_conn *pa_sync; + struct hci_conn *pa_sync, *conn; bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -6351,6 +6358,20 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_PA_SYNC); + conn = hci_conn_hash_lookup_sid(hdev, ev->sid, &ev->bdaddr, + ev->bdaddr_type); + if (!conn) { + bt_dev_err(hdev, + "Unable to find connection for dst %pMR sid 0x%2.2x", + &ev->bdaddr, ev->sid); + goto unlock; + } + + clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); + + conn->sync_handle = le16_to_cpu(ev->handle); + conn->sid = HCI_SID_INVALID; + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) { hci_le_pa_term_sync(hdev, ev->handle); @@ -6377,6 +6398,9 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, } unlock: + /* Handle any other pending PA sync command */ + hci_pa_create_sync_pending(hdev); + hci_dev_unlock(hdev); } @@ -6894,7 +6918,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_evt_le_big_sync_estabilished *ev = data; - struct hci_conn *bis; + struct hci_conn *bis, *conn; int i; bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -6905,6 +6929,20 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_big_sync_pend(hdev, ev->handle, + ev->num_bis); + if (!conn) { + bt_dev_err(hdev, + "Unable to find connection for big 0x%2.2x", + ev->handle); + goto unlock; + } + + clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); + + conn->num_bis = 0; + memset(conn->bis, 0, sizeof(conn->num_bis)); + for (i = 0; i < ev->num_bis; i++) { u16 handle = le16_to_cpu(ev->bis[i]); __le32 interval; @@ -6925,6 +6963,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, /* Mark PA sync as established */ set_bit(HCI_CONN_PA_SYNC, &bis->flags); + bis->sync_handle = conn->sync_handle; bis->iso_qos.bcast.big = ev->handle; memset(&interval, 0, sizeof(interval)); memcpy(&interval, ev->latency, sizeof(ev->latency)); @@ -6954,6 +6993,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, hci_connect_cfm(bis, ev->status); } +unlock: + /* Handle any other pending BIG sync command */ + hci_le_big_create_sync_pending(hdev); + hci_dev_unlock(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c0203a2b5107..c86f4e42e69c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4842,6 +4842,13 @@ static const struct { HCI_QUIRK_BROKEN(SET_RPA_TIMEOUT, "HCI LE Set Random Private Address Timeout command is " "advertised, but not supported."), + HCI_QUIRK_BROKEN(EXT_CREATE_CONN, + "HCI LE Extended Create Connection command is " + "advertised, but not supported."), + HCI_QUIRK_BROKEN(WRITE_AUTH_PAYLOAD_TIMEOUT, + "HCI WRITE AUTH PAYLOAD TIMEOUT command leads " + "to unexpected SMP errors when pairing " + "and will not be used."), HCI_QUIRK_BROKEN(LE_CODED, "HCI LE Coded PHY feature bit is set, " "but its usage is not supported.") @@ -6477,7 +6484,7 @@ static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data) &own_addr_type); if (err) goto done; - + /* Send command LE Extended Create Connection if supported */ if (use_ext_conn(hdev)) { err = hci_le_ext_create_conn_sync(hdev, conn, own_addr_type); goto done; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 367e32fe30eb..4b54dbbf0729 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -21,16 +21,6 @@ static const struct device_type bt_link = { .release = bt_link_release, }; -/* - * The rfcomm tty device will possibly retain even when conn - * is down, and sysfs doesn't support move zombie device, - * so we should move the device before conn device is destroyed. - */ -static int __match_tty(struct device *dev, void *data) -{ - return !strncmp(dev_name(dev), "rfcomm", 6); -} - void hci_conn_init_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -73,10 +63,13 @@ void hci_conn_del_sysfs(struct hci_conn *conn) return; } + /* If there are devices using the connection as parent reset it to NULL + * before unregistering the device. + */ while (1) { struct device *dev; - dev = device_find_child(&conn->dev, NULL, __match_tty); + dev = device_find_any_child(&conn->dev); if (!dev) break; device_move(dev, NULL, DPM_ORDER_DEV_LAST); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 7a83e400ac77..1b40fd2b2f02 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -35,6 +35,7 @@ struct iso_conn { struct sk_buff *rx_skb; __u32 rx_len; __u16 tx_sn; + struct kref ref; }; #define iso_conn_lock(c) spin_lock(&(c)->lock) @@ -93,6 +94,49 @@ static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst, #define ISO_CONN_TIMEOUT (HZ * 40) #define ISO_DISCONN_TIMEOUT (HZ * 2) +static void iso_conn_free(struct kref *ref) +{ + struct iso_conn *conn = container_of(ref, struct iso_conn, ref); + + BT_DBG("conn %p", conn); + + if (conn->sk) + iso_pi(conn->sk)->conn = NULL; + + if (conn->hcon) { + conn->hcon->iso_data = NULL; + hci_conn_drop(conn->hcon); + } + + /* Ensure no more work items will run since hci_conn has been dropped */ + disable_delayed_work_sync(&conn->timeout_work); + + kfree(conn); +} + +static void iso_conn_put(struct iso_conn *conn) +{ + if (!conn) + return; + + BT_DBG("conn %p refcnt %d", conn, kref_read(&conn->ref)); + + kref_put(&conn->ref, iso_conn_free); +} + +static struct iso_conn *iso_conn_hold_unless_zero(struct iso_conn *conn) +{ + if (!conn) + return NULL; + + BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref)); + + if (!kref_get_unless_zero(&conn->ref)) + return NULL; + + return conn; +} + static struct sock *iso_sock_hold(struct iso_conn *conn) { if (!conn || !bt_sock_linked(&iso_sk_list, conn->sk)) @@ -109,9 +153,14 @@ static void iso_sock_timeout(struct work_struct *work) timeout_work.work); struct sock *sk; + conn = iso_conn_hold_unless_zero(conn); + if (!conn) + return; + iso_conn_lock(conn); sk = iso_sock_hold(conn); iso_conn_unlock(conn); + iso_conn_put(conn); if (!sk) return; @@ -149,9 +198,14 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon) { struct iso_conn *conn = hcon->iso_data; + conn = iso_conn_hold_unless_zero(conn); if (conn) { - if (!conn->hcon) + if (!conn->hcon) { + iso_conn_lock(conn); conn->hcon = hcon; + iso_conn_unlock(conn); + } + iso_conn_put(conn); return conn; } @@ -159,6 +213,7 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon) if (!conn) return NULL; + kref_init(&conn->ref); spin_lock_init(&conn->lock); INIT_DELAYED_WORK(&conn->timeout_work, iso_sock_timeout); @@ -178,17 +233,15 @@ static void iso_chan_del(struct sock *sk, int err) struct sock *parent; conn = iso_pi(sk)->conn; + iso_pi(sk)->conn = NULL; BT_DBG("sk %p, conn %p, err %d", sk, conn, err); if (conn) { iso_conn_lock(conn); conn->sk = NULL; - iso_pi(sk)->conn = NULL; iso_conn_unlock(conn); - - if (conn->hcon) - hci_conn_drop(conn->hcon); + iso_conn_put(conn); } sk->sk_state = BT_CLOSED; @@ -210,6 +263,7 @@ static void iso_conn_del(struct hci_conn *hcon, int err) struct iso_conn *conn = hcon->iso_data; struct sock *sk; + conn = iso_conn_hold_unless_zero(conn); if (!conn) return; @@ -219,20 +273,18 @@ static void iso_conn_del(struct hci_conn *hcon, int err) iso_conn_lock(conn); sk = iso_sock_hold(conn); iso_conn_unlock(conn); + iso_conn_put(conn); - if (sk) { - lock_sock(sk); - iso_sock_clear_timer(sk); - iso_chan_del(sk, err); - release_sock(sk); - sock_put(sk); + if (!sk) { + iso_conn_put(conn); + return; } - /* Ensure no more work items will run before freeing conn. */ - cancel_delayed_work_sync(&conn->timeout_work); - - hcon->iso_data = NULL; - kfree(conn); + lock_sock(sk); + iso_sock_clear_timer(sk); + iso_chan_del(sk, err); + release_sock(sk); + sock_put(sk); } static int __iso_chan_add(struct iso_conn *conn, struct sock *sk, @@ -652,6 +704,8 @@ static void iso_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); + iso_conn_put(iso_pi(sk)->conn); + skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); } @@ -711,6 +765,7 @@ static void iso_sock_disconn(struct sock *sk) */ if (bis_sk) { hcon->state = BT_OPEN; + hcon->iso_data = NULL; iso_pi(sk)->conn->hcon = NULL; iso_sock_clear_timer(sk); iso_chan_del(sk, bt_to_errno(hcon->abort_reason)); @@ -720,7 +775,6 @@ static void iso_sock_disconn(struct sock *sk) } sk->sk_state = BT_DISCONN; - iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT); iso_conn_lock(iso_pi(sk)->conn); hci_conn_drop(iso_pi(sk)->conn->hcon); iso_pi(sk)->conn->hcon = NULL; @@ -1338,6 +1392,13 @@ static void iso_conn_big_sync(struct sock *sk) if (!hdev) return; + /* hci_le_big_create_sync requires hdev lock to be held, since + * it enqueues the HCI LE BIG Create Sync command via + * hci_cmd_sync_queue_once, which checks hdev flags that might + * change. + */ + hci_dev_lock(hdev); + if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon, &iso_pi(sk)->qos, @@ -1348,6 +1409,8 @@ static void iso_conn_big_sync(struct sock *sk) bt_dev_err(hdev, "hci_le_big_create_sync: %d", err); } + + hci_dev_unlock(hdev); } static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, @@ -1733,6 +1796,13 @@ static bool iso_match_big(struct sock *sk, void *data) return ev->handle == iso_pi(sk)->qos.bcast.big; } +static bool iso_match_big_hcon(struct sock *sk, void *data) +{ + struct hci_conn *hcon = data; + + return hcon->iso_qos.bcast.big == iso_pi(sk)->qos.bcast.big; +} + static bool iso_match_pa_sync_flag(struct sock *sk, void *data) { return test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags); @@ -1756,8 +1826,16 @@ static void iso_conn_ready(struct iso_conn *conn) if (!hcon) return; - if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags) || - test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) { + if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags)) { + /* A BIS slave hcon is notified to the ISO layer + * after the Command Complete for the LE Setup + * ISO Data Path command is received. Get the + * parent socket that matches the hcon BIG handle. + */ + parent = iso_get_sock(&hcon->src, &hcon->dst, + BT_LISTEN, iso_match_big_hcon, + hcon); + } else if (test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) { ev = hci_recv_event_data(hcon->hdev, HCI_EVT_LE_BIG_SYNC_ESTABILISHED); @@ -1824,7 +1902,6 @@ static void iso_conn_ready(struct iso_conn *conn) if (!bacmp(&hcon->dst, BDADDR_ANY)) { bacpy(&hcon->dst, &iso_pi(parent)->dst); hcon->dst_type = iso_pi(parent)->dst_type; - hcon->sync_handle = iso_pi(parent)->sync_handle; } if (ev3) { @@ -1942,6 +2019,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (sk) { int err; + struct hci_conn *hcon = iso_pi(sk)->conn->hcon; iso_pi(sk)->qos.bcast.encryption = ev2->encryption; @@ -1950,7 +2028,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { - err = hci_le_big_create_sync(hdev, NULL, + err = hci_le_big_create_sync(hdev, + hcon, &iso_pi(sk)->qos, iso_pi(sk)->sync_handle, iso_pi(sk)->bc_num_bis, diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ba437c6f6ee5..18e89e764f3b 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1886,6 +1886,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, chan = l2cap_chan_create(); if (!chan) { sk_free(sk); + sock->sk = NULL; return NULL; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a429661b676a..1f6d083682b8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -132,6 +132,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_MESH_READ_FEATURES, MGMT_OP_MESH_SEND, MGMT_OP_MESH_SEND_CANCEL, + MGMT_OP_HCI_CMD_SYNC, }; static const u16 mgmt_events[] = { @@ -2515,6 +2516,64 @@ unlock: return err; } +static int send_hci_cmd_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_hci_cmd_sync *cp = cmd->param; + struct sk_buff *skb; + + skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cp->opcode), + le16_to_cpu(cp->params_len), cp->params, + cp->event, cp->timeout ? + msecs_to_jiffies(cp->timeout * 1000) : + HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) { + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, + mgmt_status(PTR_ERR(skb))); + goto done; + } + + mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, 0, + skb->data, skb->len); + + kfree_skb(skb); + +done: + mgmt_pending_free(cmd); + + return 0; +} + +static int mgmt_hci_cmd_sync(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_hci_cmd_sync *cp = data; + struct mgmt_pending_cmd *cmd; + int err; + + if (len < sizeof(*cp)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + cmd = mgmt_pending_new(sk, MGMT_OP_HCI_CMD_SYNC, hdev, data, len); + if (!cmd) + err = -ENOMEM; + else + err = hci_cmd_sync_queue(hdev, send_hci_cmd_sync, cmd, NULL); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, + MGMT_STATUS_FAILED); + + if (cmd) + mgmt_pending_free(cmd); + } + + hci_dev_unlock(hdev); + return err; +} + /* This is a helper function to test for pending mgmt commands that can * cause CoD or EIR HCI commands. We can only allow one such pending * mgmt command at a time since otherwise we cannot easily track what @@ -9371,6 +9430,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { mesh_send, MGMT_MESH_SEND_SIZE, HCI_MGMT_VAR_LEN }, { mesh_send_cancel, MGMT_MESH_SEND_CANCEL_SIZE }, + { mgmt_hci_cmd_sync, MGMT_HCI_CMD_SYNC_SIZE, HCI_MGMT_VAR_LEN }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index f48250e3f2e1..40766f8119ed 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -274,13 +274,13 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, struct rfcomm_dlc *d; struct sock *sk; - sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern); - if (!sk) + d = rfcomm_dlc_alloc(prio); + if (!d) return NULL; - d = rfcomm_dlc_alloc(prio); - if (!d) { - sk_free(sk); + sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern); + if (!sk) { + rfcomm_dlc_free(d); return NULL; } @@ -729,7 +729,8 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u struct sock *l2cap_sk; struct l2cap_conn *conn; struct rfcomm_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; u32 opt; BT_DBG("sk %p", sk); @@ -783,7 +784,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; @@ -802,7 +803,8 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c { struct sock *sk = sock->sk; struct bt_security sec; - int len, err = 0; + int err = 0; + size_t len; BT_DBG("sk %p", sk); @@ -827,7 +829,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c sec.level = rfcomm_pi(sk)->sec_level; sec.key_size = 0; - len = min_t(unsigned int, len, sizeof(sec)); + len = min(len, sizeof(sec)); if (copy_to_user(optval, (char *) &sec, len)) err = -EFAULT; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 1c7252a36866..1b8e468d24cf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -51,6 +51,7 @@ struct sco_conn { struct delayed_work timeout_work; unsigned int mtu; + struct kref ref; }; #define sco_conn_lock(c) spin_lock(&c->lock) @@ -76,6 +77,49 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) +static void sco_conn_free(struct kref *ref) +{ + struct sco_conn *conn = container_of(ref, struct sco_conn, ref); + + BT_DBG("conn %p", conn); + + if (conn->sk) + sco_pi(conn->sk)->conn = NULL; + + if (conn->hcon) { + conn->hcon->sco_data = NULL; + hci_conn_drop(conn->hcon); + } + + /* Ensure no more work items will run since hci_conn has been dropped */ + disable_delayed_work_sync(&conn->timeout_work); + + kfree(conn); +} + +static void sco_conn_put(struct sco_conn *conn) +{ + if (!conn) + return; + + BT_DBG("conn %p refcnt %d", conn, kref_read(&conn->ref)); + + kref_put(&conn->ref, sco_conn_free); +} + +static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn) +{ + if (!conn) + return NULL; + + BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref)); + + if (!kref_get_unless_zero(&conn->ref)) + return NULL; + + return conn; +} + static struct sock *sco_sock_hold(struct sco_conn *conn) { if (!conn || !bt_sock_linked(&sco_sk_list, conn->sk)) @@ -92,6 +136,10 @@ static void sco_sock_timeout(struct work_struct *work) timeout_work.work); struct sock *sk; + conn = sco_conn_hold_unless_zero(conn); + if (!conn) + return; + sco_conn_lock(conn); if (!conn->hcon) { sco_conn_unlock(conn); @@ -99,6 +147,7 @@ static void sco_sock_timeout(struct work_struct *work) } sk = sco_sock_hold(conn); sco_conn_unlock(conn); + sco_conn_put(conn); if (!sk) return; @@ -136,9 +185,14 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) { struct sco_conn *conn = hcon->sco_data; + conn = sco_conn_hold_unless_zero(conn); if (conn) { - if (!conn->hcon) + if (!conn->hcon) { + sco_conn_lock(conn); conn->hcon = hcon; + sco_conn_unlock(conn); + } + sco_conn_put(conn); return conn; } @@ -146,6 +200,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) if (!conn) return NULL; + kref_init(&conn->ref); spin_lock_init(&conn->lock); INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); @@ -170,17 +225,15 @@ static void sco_chan_del(struct sock *sk, int err) struct sco_conn *conn; conn = sco_pi(sk)->conn; + sco_pi(sk)->conn = NULL; BT_DBG("sk %p, conn %p, err %d", sk, conn, err); if (conn) { sco_conn_lock(conn); conn->sk = NULL; - sco_pi(sk)->conn = NULL; sco_conn_unlock(conn); - - if (conn->hcon) - hci_conn_drop(conn->hcon); + sco_conn_put(conn); } sk->sk_state = BT_CLOSED; @@ -195,29 +248,28 @@ static void sco_conn_del(struct hci_conn *hcon, int err) struct sco_conn *conn = hcon->sco_data; struct sock *sk; + conn = sco_conn_hold_unless_zero(conn); if (!conn) return; BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); - /* Kill socket */ sco_conn_lock(conn); sk = sco_sock_hold(conn); sco_conn_unlock(conn); + sco_conn_put(conn); - if (sk) { - lock_sock(sk); - sco_sock_clear_timer(sk); - sco_chan_del(sk, err); - release_sock(sk); - sock_put(sk); + if (!sk) { + sco_conn_put(conn); + return; } - /* Ensure no more work items will run before freeing conn. */ - cancel_delayed_work_sync(&conn->timeout_work); - - hcon->sco_data = NULL; - kfree(conn); + /* Kill socket */ + lock_sock(sk); + sco_sock_clear_timer(sk); + sco_chan_del(sk, err); + release_sock(sk); + sock_put(sk); } static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, @@ -401,6 +453,8 @@ static void sco_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); + sco_conn_put(sco_pi(sk)->conn); + skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); } @@ -448,17 +502,6 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: - if (sco_pi(sk)->conn->hcon) { - sk->sk_state = BT_DISCONN; - sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); - sco_conn_lock(sco_pi(sk)->conn); - hci_conn_drop(sco_pi(sk)->conn->hcon); - sco_pi(sk)->conn->hcon = NULL; - sco_conn_unlock(sco_pi(sk)->conn); - } else - sco_chan_del(sk, ECONNRESET); - break; - case BT_CONNECT2: case BT_CONNECT: case BT_DISCONN: diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 26b79feb385d..0ab4613aa07a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -328,7 +328,7 @@ int br_netpoll_enable(struct net_bridge_port *p) return __br_netpoll_enable(p); } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int br_netpoll_setup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 642b8ccaae8e..82bac2426631 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -73,13 +73,6 @@ static inline int has_expired(const struct net_bridge *br, time_before_eq(fdb->updated + hold_time(br), jiffies); } -static void fdb_rcu_free(struct rcu_head *head) -{ - struct net_bridge_fdb_entry *ent - = container_of(head, struct net_bridge_fdb_entry, rcu); - kmem_cache_free(br_fdb_cache, ent); -} - static int fdb_to_nud(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { @@ -329,7 +322,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f, if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags)) atomic_dec(&br->fdb_n_learned); fdb_notify(br, f, RTM_DELNEIGH, swdev_notify); - call_rcu(&f->rcu, fdb_rcu_free); + kfree_rcu(f, rcu); } /* Delete a local entry if no other port had the same address. @@ -1159,7 +1152,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], - struct netlink_ext_ack *extack) + bool *notified, struct netlink_ext_ack *extack) { int err = 0; @@ -1190,6 +1183,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } + if (!err) + *notified = true; return err; } @@ -1202,7 +1197,7 @@ static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = { int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, - struct netlink_ext_ack *extack) + bool *notified, struct netlink_ext_ack *extack) { struct nlattr *nfea_tb[NFEA_MAX + 1], *attr; struct net_bridge_vlan_group *vg; @@ -1265,10 +1260,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* VID was specified, so use it. */ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, - extack); + notified, extack); } else { err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, - extack); + notified, extack); if (err || !vg || !vg->num_vlans) goto out; @@ -1280,7 +1275,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, - nfea_tb, extack); + nfea_tb, notified, extack); if (err) goto out; } @@ -1292,7 +1287,7 @@ out: static int fdb_delete_by_addr_and_port(struct net_bridge *br, const struct net_bridge_port *p, - const u8 *addr, u16 vlan) + const u8 *addr, u16 vlan, bool *notified) { struct net_bridge_fdb_entry *fdb; @@ -1301,18 +1296,19 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br, return -ENOENT; fdb_delete(br, fdb, true); + *notified = true; return 0; } static int __br_fdb_delete(struct net_bridge *br, const struct net_bridge_port *p, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, bool *notified) { int err; spin_lock_bh(&br->hash_lock); - err = fdb_delete_by_addr_and_port(br, p, addr, vid); + err = fdb_delete_by_addr_and_port(br, p, addr, vid, notified); spin_unlock_bh(&br->hash_lock); return err; @@ -1321,12 +1317,11 @@ static int __br_fdb_delete(struct net_bridge *br, /* Remove neighbor entry with RTM_DELNEIGH */ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool *notified, struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; - struct net_bridge_vlan *v; struct net_bridge *br; int err; @@ -1345,23 +1340,19 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], } if (vid) { - v = br_vlan_find(vg, vid); - if (!v) { - pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name); - return -EINVAL; - } - - err = __br_fdb_delete(br, p, addr, vid); + err = __br_fdb_delete(br, p, addr, vid, notified); } else { + struct net_bridge_vlan *v; + err = -ENOENT; - err &= __br_fdb_delete(br, p, addr, 0); + err &= __br_fdb_delete(br, p, addr, 0, notified); if (!vg || !vg->num_vlans) return err; list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - err &= __br_fdb_delete(br, p, addr, v->vid); + err &= __br_fdb_delete(br, p, addr, v->vid, notified); } } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 1d458e9da660..451e45b9a6a5 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -370,11 +370,11 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, */ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb->dev, *br_indev; - struct iphdr *iph = ip_hdr(skb); struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + struct net_device *dev = skb->dev, *br_indev; + const struct iphdr *iph = ip_hdr(skb); + enum skb_drop_reason reason; struct rtable *rt; - int err; br_indev = nf_bridge_get_physindev(skb, net); if (!br_indev) { @@ -390,7 +390,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ } nf_bridge->in_prerouting = 0; if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { - if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { + reason = ip_route_input(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev); + if (reason) { struct in_device *in_dev = __in_dev_get_rcu(dev); /* If err equals -EHOSTUNREACH the error is due to a @@ -400,11 +402,12 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ * martian destinations: loopback destinations and destination * 0.0.0.0. In both cases the packet will be dropped because the * destination is the loopback device and not the bridge. */ - if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) + if (reason != SKB_DROP_REASON_IP_INADDRERRORS || !in_dev || + IN_DEV_FORWARD(in_dev)) goto free_skb; rt = ip_route_output(net, iph->daddr, 0, - iph->tos & INET_DSCP_MASK, 0, + ip4h_dscp(iph), 0, RT_SCOPE_UNIVERSE); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6b97ae47f855..3e0f47203f2a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1924,7 +1924,9 @@ int __init br_netlink_init(void) if (err) goto out; - rtnl_af_register(&br_af_ops); + err = rtnl_af_register(&br_af_ops); + if (err) + goto out_vlan; err = rtnl_link_register(&br_link_ops); if (err) @@ -1934,6 +1936,8 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); +out_vlan: + br_vlan_rtnl_uninit(); out: return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 041f6e571a20..9853cfbb9d14 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -853,12 +853,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - struct netlink_ext_ack *extack); + bool *notified, struct netlink_ext_ack *extack); int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, - struct netlink_ext_ack *extack); + bool *notified, struct netlink_ext_ack *extack); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 104c0125e32e..f16bbbbb9481 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -41,7 +41,13 @@ config NF_CONNTRACK_BRIDGE # old sockopt interface and eval loop config BRIDGE_NF_EBTABLES_LEGACY - tristate + tristate "Legacy EBTABLES support" + depends on BRIDGE && NETFILTER_XTABLES + default n + help + Legacy ebtables packet/frame classifier. + This is not needed if you are using ebtables over nftables + (iptables-nft). menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index d12a221366d6..5adced1e7d0c 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -63,7 +63,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, return nft_meta_get_eval(expr, regs, pkt); } - strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ); + strscpy_pad((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ); return; err: regs->verdict.code = NFT_BREAK; diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 9cef9496a707..171fa32ada85 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -183,12 +183,6 @@ bool cfsrvl_ready(struct cfsrvl *service, int *err) return true; } -u8 cfsrvl_getphyid(struct cflayer *layer) -{ - struct cfsrvl *servl = container_obj(layer); - return servl->dev_info.id; -} - bool cfsrvl_phyid_match(struct cflayer *layer, int phyid) { struct cfsrvl *servl = container_obj(layer); diff --git a/net/can/af_can.c b/net/can/af_can.c index 707576eeeb58..01f3fbb3b67d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -171,6 +171,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, /* release sk on errors */ sock_orphan(sk); sock_put(sk); + sock->sk = NULL; } errout: diff --git a/net/can/gw.c b/net/can/gw.c index 37528826935e..ef93293c1fae 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1265,6 +1265,15 @@ static struct pernet_operations cangw_pernet_ops = { .exit_batch = cangw_pernet_exit_batch, }; +static const struct rtnl_msg_handler cgw_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_NEWROUTE, + .doit = cgw_create_job}, + {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_DELROUTE, + .doit = cgw_remove_job}, + {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_GETROUTE, + .dumpit = cgw_dump_jobs}, +}; + static __init int cgw_module_init(void) { int ret; @@ -1290,27 +1299,13 @@ static __init int cgw_module_init(void) if (ret) goto out_register_notifier; - ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE, - NULL, cgw_dump_jobs, 0); - if (ret) - goto out_rtnl_register1; - - ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, - cgw_create_job, NULL, 0); - if (ret) - goto out_rtnl_register2; - ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, - cgw_remove_job, NULL, 0); + ret = rtnl_register_many(cgw_rtnl_msg_handlers); if (ret) - goto out_rtnl_register3; + goto out_rtnl_register; return 0; -out_rtnl_register3: - rtnl_unregister(PF_CAN, RTM_NEWROUTE); -out_rtnl_register2: - rtnl_unregister(PF_CAN, RTM_GETROUTE); -out_rtnl_register1: +out_rtnl_register: unregister_netdevice_notifier(¬ifier); out_register_notifier: kmem_cache_destroy(cgw_cache); diff --git a/net/can/raw.c b/net/can/raw.c index 00533f64d69d..255c0a8f39d6 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -966,7 +966,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; - skb_setup_tx_timestamp(skb, sockc.tsflags); + skb_setup_tx_timestamp(skb, &sockc); err = can_send(skb, ro->loopback); diff --git a/net/core/Makefile b/net/core/Makefile index c3ebbaf9c81e..d9326600e289 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -45,3 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o obj-$(CONFIG_NET_TEST) += net_test.o obj-$(CONFIG_NET_DEVMEM) += devmem.o +obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o +obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o diff --git a/net/core/dev.c b/net/core/dev.c index 8453e14d301b..13d00fc10f55 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2949,6 +2949,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); + net_shaper_set_real_num_tx_queues(dev, txq); + dev_qdisc_change_real_num_tx(dev, txq); dev->real_num_tx_queues = txq; @@ -6234,12 +6236,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) { if (n->gro_bitmask) - timeout = READ_ONCE(n->dev->gro_flush_timeout); - n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs); + timeout = napi_get_gro_flush_timeout(n); + n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n); } if (n->defer_hard_irqs_count > 0) { n->defer_hard_irqs_count--; - timeout = READ_ONCE(n->dev->gro_flush_timeout); + timeout = napi_get_gro_flush_timeout(n); if (timeout) ret = false; } @@ -6373,8 +6375,8 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (flags & NAPI_F_PREFER_BUSY_POLL) { - napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs); - timeout = READ_ONCE(napi->dev->gro_flush_timeout); + napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); + timeout = napi_get_gro_flush_timeout(napi); if (napi->defer_hard_irqs_count && timeout) { hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); skip_schedule = true; @@ -6505,8 +6507,62 @@ void napi_busy_loop(unsigned int napi_id, } EXPORT_SYMBOL(napi_busy_loop); +void napi_suspend_irqs(unsigned int napi_id) +{ + struct napi_struct *napi; + + rcu_read_lock(); + napi = napi_by_id(napi_id); + if (napi) { + unsigned long timeout = napi_get_irq_suspend_timeout(napi); + + if (timeout) + hrtimer_start(&napi->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + } + rcu_read_unlock(); +} + +void napi_resume_irqs(unsigned int napi_id) +{ + struct napi_struct *napi; + + rcu_read_lock(); + napi = napi_by_id(napi_id); + if (napi) { + /* If irq_suspend_timeout is set to 0 between the call to + * napi_suspend_irqs and now, the original value still + * determines the safety timeout as intended and napi_watchdog + * will resume irq processing. + */ + if (napi_get_irq_suspend_timeout(napi)) { + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); + } + } + rcu_read_unlock(); +} + #endif /* CONFIG_NET_RX_BUSY_POLL */ +static void __napi_hash_add_with_id(struct napi_struct *napi, + unsigned int napi_id) +{ + napi->napi_id = napi_id; + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); +} + +static void napi_hash_add_with_id(struct napi_struct *napi, + unsigned int napi_id) +{ + spin_lock(&napi_hash_lock); + WARN_ON_ONCE(napi_by_id(napi_id)); + __napi_hash_add_with_id(napi, napi_id); + spin_unlock(&napi_hash_lock); +} + static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state)) @@ -6519,10 +6575,8 @@ static void napi_hash_add(struct napi_struct *napi) if (unlikely(++napi_gen_id < MIN_NAPI_ID)) napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); - napi->napi_id = napi_gen_id; - hlist_add_head_rcu(&napi->napi_hash_node, - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + __napi_hash_add_with_id(napi, napi_gen_id); spin_unlock(&napi_hash_lock); } @@ -6645,6 +6699,30 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, } EXPORT_SYMBOL(netif_queue_set_napi); +static void napi_restore_config(struct napi_struct *n) +{ + n->defer_hard_irqs = n->config->defer_hard_irqs; + n->gro_flush_timeout = n->config->gro_flush_timeout; + n->irq_suspend_timeout = n->config->irq_suspend_timeout; + /* a NAPI ID might be stored in the config, if so use it. if not, use + * napi_hash_add to generate one for us. It will be saved to the config + * in napi_disable. + */ + if (n->config->napi_id) + napi_hash_add_with_id(n, n->config->napi_id); + else + napi_hash_add(n); +} + +static void napi_save_config(struct napi_struct *n) +{ + n->config->defer_hard_irqs = n->defer_hard_irqs; + n->config->gro_flush_timeout = n->gro_flush_timeout; + n->config->irq_suspend_timeout = n->irq_suspend_timeout; + n->config->napi_id = n->napi_id; + napi_hash_del(n); +} + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6672,7 +6750,13 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); - napi_hash_add(napi); + + /* default settings from sysfs are applied to all NAPIs. any per-NAPI + * configuration will be loaded in napi_enable + */ + napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs)); + napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout)); + napi_get_frags_check(napi); /* Create kthread for this napi if dev->threaded is set. * Clear dev->threaded if kthread creation failed so that @@ -6704,6 +6788,11 @@ void napi_disable(struct napi_struct *n) hrtimer_cancel(&n->timer); + if (n->config) + napi_save_config(n); + else + napi_hash_del(n); + clear_bit(NAPI_STATE_DISABLE, &n->state); } EXPORT_SYMBOL(napi_disable); @@ -6719,6 +6808,11 @@ void napi_enable(struct napi_struct *n) { unsigned long new, val = READ_ONCE(n->state); + if (n->config) + napi_restore_config(n); + else + napi_hash_add(n); + do { BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); @@ -6748,7 +6842,11 @@ void __netif_napi_del(struct napi_struct *napi) if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; - napi_hash_del(napi); + if (napi->config) { + napi->index = -1; + napi->config = NULL; + } + list_del_rcu(&napi->dev_list); napi_free_frags(napi); @@ -11060,8 +11158,8 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) WARN_ON(dev->reg_state == NETREG_REGISTERED); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { - dev->gro_flush_timeout = 20000; - dev->napi_defer_hard_irqs = 1; + netdev_set_gro_flush_timeout(dev, 20000); + netdev_set_defer_hard_irqs(dev, 1); } } EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); @@ -11085,6 +11183,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; + size_t napi_config_sz; + unsigned int maxqs; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -11098,6 +11198,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } + maxqs = max(txqs, rxqs); + dev = kvzalloc(struct_size(dev, priv, sizeof_priv), GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!dev) @@ -11151,6 +11253,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, hash_init(dev->qdisc_hash); #endif + mutex_init(&dev->lock); + dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11172,6 +11276,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool) goto free_all; + napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); + dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); + if (!dev->napi_config) + goto free_all; + strscpy(dev->name, name); dev->name_assign_type = name_assign_type; dev->group = INIT_NETDEV_GROUP; @@ -11221,6 +11330,8 @@ void free_netdev(struct net_device *dev) return; } + mutex_destroy(&dev->lock); + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -11233,6 +11344,8 @@ void free_netdev(struct net_device *dev) list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); + kvfree(dev->napi_config); + ref_tracker_dir_exit(&dev->refcnt_tracker); #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); @@ -11430,6 +11543,8 @@ void unregister_netdevice_many_notify(struct list_head *head, mutex_destroy(&dev->ethtool->rss_lock); + net_shaper_flush_netdev(dev); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); @@ -11998,8 +12113,6 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); - CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); - CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); @@ -12011,7 +12124,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92); } /* diff --git a/net/core/dev.h b/net/core/dev.h index 5654325c5b71..d043dee25a68 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -35,6 +35,16 @@ void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); +#if IS_ENABLED(CONFIG_NET_SHAPER) +void net_shaper_flush_netdev(struct net_device *dev); +void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq); +#else +static inline void net_shaper_flush_netdev(struct net_device *dev) {} +static inline void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq) {} +#endif + /* sysctls not referred to from outside net/core/ */ extern int netdev_unregister_timeout_secs; extern int weight_p; @@ -138,6 +148,119 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, WRITE_ONCE(dev->gro_ipv4_max_size, size); } +/** + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs + * @n: napi struct to get the defer_hard_irqs field from + * + * Return: the per-NAPI value of the defar_hard_irqs field. + */ +static inline u32 napi_get_defer_hard_irqs(const struct napi_struct *n) +{ + return READ_ONCE(n->defer_hard_irqs); +} + +/** + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi + * @n: napi_struct to set the defer_hard_irqs field + * @defer: the value the field should be set to + */ +static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer) +{ + WRITE_ONCE(n->defer_hard_irqs, defer); +} + +/** + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev + * @netdev: the net_device for which all NAPIs will have defer_hard_irqs set + * @defer: the defer_hard_irqs value to set + */ +static inline void netdev_set_defer_hard_irqs(struct net_device *netdev, + u32 defer) +{ + unsigned int count = max(netdev->num_rx_queues, + netdev->num_tx_queues); + struct napi_struct *napi; + int i; + + WRITE_ONCE(netdev->napi_defer_hard_irqs, defer); + list_for_each_entry(napi, &netdev->napi_list, dev_list) + napi_set_defer_hard_irqs(napi, defer); + + for (i = 0; i < count; i++) + netdev->napi_config[i].defer_hard_irqs = defer; +} + +/** + * napi_get_gro_flush_timeout - get the gro_flush_timeout + * @n: napi struct to get the gro_flush_timeout from + * + * Return: the per-NAPI value of the gro_flush_timeout field. + */ +static inline unsigned long +napi_get_gro_flush_timeout(const struct napi_struct *n) +{ + return READ_ONCE(n->gro_flush_timeout); +} + +/** + * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi + * @n: napi struct to set the gro_flush_timeout + * @timeout: timeout value to set + * + * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout + */ +static inline void napi_set_gro_flush_timeout(struct napi_struct *n, + unsigned long timeout) +{ + WRITE_ONCE(n->gro_flush_timeout, timeout); +} + +/** + * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs + * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set + * @timeout: the timeout value to set + */ +static inline void netdev_set_gro_flush_timeout(struct net_device *netdev, + unsigned long timeout) +{ + unsigned int count = max(netdev->num_rx_queues, + netdev->num_tx_queues); + struct napi_struct *napi; + int i; + + WRITE_ONCE(netdev->gro_flush_timeout, timeout); + list_for_each_entry(napi, &netdev->napi_list, dev_list) + napi_set_gro_flush_timeout(napi, timeout); + + for (i = 0; i < count; i++) + netdev->napi_config[i].gro_flush_timeout = timeout; +} + +/** + * napi_get_irq_suspend_timeout - get the irq_suspend_timeout + * @n: napi struct to get the irq_suspend_timeout from + * + * Return: the per-NAPI value of the irq_suspend_timeout field. + */ +static inline unsigned long +napi_get_irq_suspend_timeout(const struct napi_struct *n) +{ + return READ_ONCE(n->irq_suspend_timeout); +} + +/** + * napi_set_irq_suspend_timeout - set the irq_suspend_timeout for a napi + * @n: napi struct to set the irq_suspend_timeout + * @timeout: timeout value to set + * + * napi_set_irq_suspend_timeout sets the per-NAPI irq_suspend_timeout + */ +static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, + unsigned long timeout) +{ + WRITE_ONCE(n->irq_suspend_timeout, timeout); +} + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 473c437b6b53..46d43b950471 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -64,7 +64,7 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc) } /* Loop over the interfaces, and write an info block for each. */ - rtnl_lock(); + rtnl_net_lock(net); for_each_netdev(net, dev) { if (!pos) done = inet_gifconf(dev, NULL, 0, size); @@ -72,12 +72,12 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc) done = inet_gifconf(dev, pos + total, len - total, size); if (done < 0) { - rtnl_unlock(); + rtnl_net_unlock(net); return -EFAULT; } total += done; } - rtnl_unlock(); + rtnl_net_unlock(net); return put_user(total, &uifc->ifc_len); } diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index fc96259807b6..5cdca49b1d7c 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -43,7 +43,6 @@ static unsigned int fib_seq_sum(struct net *net) struct fib_notifier_ops *ops; unsigned int fib_seq = 0; - rtnl_lock(); rcu_read_lock(); list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { if (!try_module_get(ops->owner)) @@ -52,7 +51,6 @@ static unsigned int fib_seq_sum(struct net *net) module_put(ops->owner); } rcu_read_unlock(); - rtnl_unlock(); return fib_seq; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 154a2681f55c..34185d138c95 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -101,7 +101,8 @@ static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); -static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) +static struct fib_rules_ops *lookup_rules_ops(const struct net *net, + int family) { struct fib_rules_ops *ops; @@ -370,7 +371,9 @@ static int call_fib_rule_notifiers(struct net *net, .rule = rule, }; - ops->fib_rules_seq++; + ASSERT_RTNL(); + /* Paired with READ_ONCE() in fib_rules_seq() */ + WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1); return call_fib_notifiers(net, event_type, &info.info); } @@ -397,17 +400,16 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, } EXPORT_SYMBOL_GPL(fib_rules_dump); -unsigned int fib_rules_seq_read(struct net *net, int family) +unsigned int fib_rules_seq_read(const struct net *net, int family) { unsigned int fib_rules_seq; struct fib_rules_ops *ops; - ASSERT_RTNL(); - ops = lookup_rules_ops(net, family); if (!ops) return 0; - fib_rules_seq = ops->fib_rules_seq; + /* Paired with WRITE_ONCE() in call_fib_rule_notifiers() */ + fib_rules_seq = READ_ONCE(ops->fib_rules_seq); rules_ops_put(ops); return fib_rules_seq; @@ -556,8 +558,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, nlrule->pref = fib_default_rule_pref(ops); } - nlrule->proto = tb[FRA_PROTOCOL] ? - nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC; + nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC); if (tb[FRA_IIFNAME]) { struct net_device *dev; @@ -1289,13 +1290,18 @@ static struct pernet_operations fib_rules_net_ops = { .exit = fib_rules_net_exit, }; +static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule}, + {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule}, + {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, +}; + static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, - RTNL_FLAG_DUMP_UNLOCKED); + + rtnl_register_many(fib_rules_rtnl_msg_handlers); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) @@ -1310,9 +1316,7 @@ static int __init fib_rules_init(void) fail_unregister: unregister_pernet_subsys(&fib_rules_net_ops); fail: - rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); - rtnl_unregister(PF_UNSPEC, RTM_DELRULE); - rtnl_unregister(PF_UNSPEC, RTM_GETRULE); + rtnl_unregister_many(fib_rules_rtnl_msg_handlers); return err; } diff --git a/net/core/filter.c b/net/core/filter.c index fb56567c551e..6625b3f563a4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1654,18 +1654,6 @@ void sk_reuseport_prog_free(struct bpf_prog *prog) bpf_prog_destroy(prog); } -struct bpf_scratchpad { - union { - __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; - u8 buff[MAX_BPF_STACK]; - }; - local_lock_t bh_lock; -}; - -static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp) = { - .bh_lock = INIT_LOCAL_LOCK(bh_lock), -}; - static inline int __bpf_try_make_writable(struct sk_buff *skb, unsigned int write_len) { @@ -2022,11 +2010,6 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = { BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, __be32 *, to, u32, to_size, __wsum, seed) { - struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); - u32 diff_size = from_size + to_size; - int i, j = 0; - __wsum ret; - /* This is quite flexible, some examples: * * from_size == 0, to_size > 0, seed := csum --> pushing data @@ -2035,19 +2018,19 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, * * Even for diffing, from_size and to_size don't need to be equal. */ - if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) || - diff_size > sizeof(sp->diff))) - return -EINVAL; - local_lock_nested_bh(&bpf_sp.bh_lock); - for (i = 0; i < from_size / sizeof(__be32); i++, j++) - sp->diff[j] = ~from[i]; - for (i = 0; i < to_size / sizeof(__be32); i++, j++) - sp->diff[j] = to[i]; + __wsum ret = seed; - ret = csum_partial(sp->diff, diff_size, seed); - local_unlock_nested_bh(&bpf_sp.bh_lock); - return ret; + if (from_size && to_size) + ret = csum_sub(csum_partial(to, to_size, ret), + csum_partial(from, from_size, 0)); + else if (to_size) + ret = csum_partial(to, to_size, ret); + + else if (from_size) + ret = ~csum_partial(from, from_size, ~ret); + + return csum_from32to16((__force unsigned int)ret); } static const struct bpf_func_proto bpf_csum_diff_proto = { @@ -2372,7 +2355,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4 = { .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, - .flowi4_tos = ip4h->tos & INET_DSCP_MASK, + .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)), .flowi4_oif = dev->ifindex, .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, @@ -2621,18 +2604,16 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) static void sk_msg_reset_curr(struct sk_msg *msg) { - u32 i = msg->sg.start; - u32 len = 0; - - do { - len += sk_msg_elem(msg, i)->length; - sk_msg_iter_var_next(i); - if (len >= msg->sg.size) - break; - } while (i != msg->sg.end); + if (!msg->sg.size) { + msg->sg.curr = msg->sg.start; + msg->sg.copybreak = 0; + } else { + u32 i = msg->sg.end; - msg->sg.curr = i; - msg->sg.copybreak = 0; + sk_msg_iter_var_prev(i); + msg->sg.curr = i; + msg->sg.copybreak = msg->sg.data[i].length; + } } static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { @@ -2795,7 +2776,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, sk_msg_iter_var_next(i); } while (i != msg->sg.end); - if (start >= offset + l) + if (start > offset + l) return -EINVAL; space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); @@ -2820,6 +2801,8 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, raw = page_address(page); + if (i == msg->sg.end) + sk_msg_iter_var_prev(i); psge = sk_msg_elem(msg, i); front = start - offset; back = psge->length - front; @@ -2836,7 +2819,13 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, } put_page(sg_page(psge)); - } else if (start - offset) { + new = i; + goto place_new; + } + + if (start - offset) { + if (i == msg->sg.end) + sk_msg_iter_var_prev(i); psge = sk_msg_elem(msg, i); rsge = sk_msg_elem_cpy(msg, i); @@ -2847,39 +2836,44 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, sk_msg_iter_var_next(i); sg_unmark_end(psge); sg_unmark_end(&rsge); - sk_msg_iter_next(msg, end); } /* Slot(s) to place newly allocated data */ + sk_msg_iter_next(msg, end); new = i; + sk_msg_iter_var_next(i); + + if (i == msg->sg.end) { + if (!rsge.length) + goto place_new; + sk_msg_iter_next(msg, end); + goto place_new; + } /* Shift one or two slots as needed */ - if (!copy) { - sge = sk_msg_elem_cpy(msg, i); + sge = sk_msg_elem_cpy(msg, new); + sg_unmark_end(&sge); + nsge = sk_msg_elem_cpy(msg, i); + if (rsge.length) { sk_msg_iter_var_next(i); - sg_unmark_end(&sge); + nnsge = sk_msg_elem_cpy(msg, i); sk_msg_iter_next(msg, end); + } - nsge = sk_msg_elem_cpy(msg, i); + while (i != msg->sg.end) { + msg->sg.data[i] = sge; + sge = nsge; + sk_msg_iter_var_next(i); if (rsge.length) { - sk_msg_iter_var_next(i); + nsge = nnsge; nnsge = sk_msg_elem_cpy(msg, i); - } - - while (i != msg->sg.end) { - msg->sg.data[i] = sge; - sge = nsge; - sk_msg_iter_var_next(i); - if (rsge.length) { - nsge = nnsge; - nnsge = sk_msg_elem_cpy(msg, i); - } else { - nsge = sk_msg_elem_cpy(msg, i); - } + } else { + nsge = sk_msg_elem_cpy(msg, i); } } +place_new: /* Place newly allocated data buffer */ sk_mem_charge(msg->sk, len); msg->sg.size += len; @@ -2908,8 +2902,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = { static void sk_msg_shift_left(struct sk_msg *msg, int i) { + struct scatterlist *sge = sk_msg_elem(msg, i); int prev; + put_page(sg_page(sge)); do { prev = i; sk_msg_iter_var_next(i); @@ -2946,6 +2942,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL; + if (unlikely(len == 0)) + return 0; + /* First find the starting scatterlist element */ i = msg->sg.start; do { @@ -2958,7 +2957,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, } while (i != msg->sg.end); /* Bounds checks: start and pop must be inside message */ - if (start >= offset + l || last >= msg->sg.size) + if (start >= offset + l || last > msg->sg.size) return -EINVAL; space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); @@ -2987,12 +2986,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, */ if (start != offset) { struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); - int a = start; + int a = start - offset; int b = sge->length - pop - a; sk_msg_iter_var_next(i); - if (pop < sge->length - a) { + if (b > 0) { if (space) { sge->length = a; sk_msg_shift_right(msg, i); @@ -3011,7 +3010,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(!page)) return -ENOMEM; - sge->length = a; orig = sg_page(sge); from = sg_virt(sge); to = page_address(page); @@ -3021,7 +3019,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, put_page(orig); } pop = 0; - } else if (pop >= sge->length - a) { + } else { pop -= (sge->length - a); sge->length = a; } @@ -3055,7 +3053,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, pop -= sge->length; sk_msg_shift_left(msg, i); } - sk_msg_iter_var_next(i); } sk_mem_uncharge(msg->sk, len - pop); @@ -5140,6 +5137,17 @@ static u64 __bpf_get_netns_cookie(struct sock *sk) return net->net_cookie; } +BPF_CALL_1(bpf_get_netns_cookie, struct sk_buff *, skb) +{ + return __bpf_get_netns_cookie(skb && skb->sk ? skb->sk : NULL); +} + +static const struct bpf_func_proto bpf_get_netns_cookie_proto = { + .func = bpf_get_netns_cookie, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX_OR_NULL, +}; + BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx) { return __bpf_get_netns_cookie(ctx); @@ -6768,8 +6776,6 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk * sock refcnt is decremented to prevent a request_sock leak. */ - if (!sk_fullsock(sk2)) - sk2 = NULL; if (sk2 != sk) { sock_gen_put(sk); /* Ensure there is no need to bump sk2 refcnt */ @@ -6816,8 +6822,6 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk * sock refcnt is decremented to prevent a request_sock leak. */ - if (!sk_fullsock(sk2)) - sk2 = NULL; if (sk2 != sk) { sock_gen_put(sk); /* Ensure there is no need to bump sk2 refcnt */ @@ -7266,7 +7270,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) { sk = sk_to_full_sk(sk); - if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) + if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) return (unsigned long)sk; return (unsigned long)NULL; @@ -8199,6 +8203,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_under_cgroup_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_netns_cookie: + return &bpf_get_netns_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; case BPF_FUNC_fib_lookup: @@ -10231,10 +10237,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, } \ } while (0) -#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \ - SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \ - S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF) - static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 1a14f915b7a4..ae74634310a3 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -10,6 +10,7 @@ #include <linux/bpf.h> #include <net/lwtunnel.h> #include <net/gre.h> +#include <net/ip.h> #include <net/ip6_route.h> #include <net/ipv6_stubs.h> #include <net/inet_dscp.h> @@ -87,16 +88,18 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, static int bpf_lwt_input_reroute(struct sk_buff *skb) { + enum skb_drop_reason reason; int err = -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { struct net_device *dev = skb_dst(skb)->dev; - struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); dev_hold(dev); skb_dst_drop(skb); - err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, dev); + reason = ip_route_input_noref(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev); + err = reason ? -EINVAL : 0; dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { skb_dst_drop(skb); @@ -206,7 +209,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) fl4.flowi4_oif = oif; fl4.flowi4_mark = skb->mark; fl4.flowi4_uid = sock_net_uid(net, sk); - fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = iph->protocol; fl4.daddr = iph->daddr; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 77b819cd995b..89656d180bc6 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -14,7 +14,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> -#include <linux/kmemleak.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> @@ -61,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, static const struct seq_operations neigh_stat_seq_ops; #endif +static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family) +{ + int i; + + switch (family) { + default: + DEBUG_NET_WARN_ON_ONCE(1); + fallthrough; /* to avoid panic by null-ptr-deref */ + case AF_INET: + i = NEIGH_ARP_TABLE; + break; + case AF_INET6: + i = NEIGH_ND_TABLE; + break; + } + + return &dev->neighbours[i]; +} + /* Neighbour hash table buckets are protected with rwlock tbl->lock. @@ -205,18 +223,14 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, } } -static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, - struct neigh_table *tbl) +bool neigh_remove_one(struct neighbour *n) { bool retval = false; write_lock(&n->lock); if (refcount_read(&n->refcnt) == 1) { - struct neighbour *neigh; - - neigh = rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock)); - rcu_assign_pointer(*np, neigh); + hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); neigh_mark_dead(n); retval = true; } @@ -226,29 +240,6 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, return retval; } -bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) -{ - struct neigh_hash_table *nht; - void *pkey = ndel->primary_key; - u32 hash_val; - struct neighbour *n; - struct neighbour __rcu **np; - - nht = rcu_dereference_protected(tbl->nht, - lockdep_is_held(&tbl->lock)); - hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); - hash_val = hash_val >> (32 - nht->hash_shift); - - np = &nht->hash_buckets[hash_val]; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock)))) { - if (n == ndel) - return neigh_del(n, np, tbl); - np = &n->next; - } - return false; -} - static int neigh_forced_gc(struct neigh_table *tbl) { int max_clean = atomic_read(&tbl->gc_entries) - @@ -276,7 +267,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) remove = true; write_unlock(&n->lock); - if (remove && neigh_remove_one(n, tbl)) + if (remove && neigh_remove_one(n)) shrunk++; if (shrunk >= max_clean) break; @@ -380,54 +371,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, bool skip_perm) { - int i; - struct neigh_hash_table *nht; + struct hlist_head *dev_head; + struct hlist_node *tmp; + struct neighbour *n; - nht = rcu_dereference_protected(tbl->nht, - lockdep_is_held(&tbl->lock)); + dev_head = neigh_get_dev_table(dev, tbl->family); - for (i = 0; i < (1 << nht->hash_shift); i++) { - struct neighbour *n; - struct neighbour __rcu **np = &nht->hash_buckets[i]; + hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) { + if (skip_perm && n->nud_state & NUD_PERMANENT) + continue; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock))) != NULL) { - if (dev && n->dev != dev) { - np = &n->next; - continue; - } - if (skip_perm && n->nud_state & NUD_PERMANENT) { - np = &n->next; - continue; - } - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); - write_lock(&n->lock); - neigh_del_timer(n); - neigh_mark_dead(n); - if (refcount_read(&n->refcnt) != 1) { - /* The most unpleasant situation. - We must destroy neighbour entry, - but someone still uses it. - - The destroy will be delayed until - the last user releases us, but - we must kill timers etc. and move - it to safe state. - */ - __skb_queue_purge(&n->arp_queue); - n->arp_queue_len_bytes = 0; - WRITE_ONCE(n->output, neigh_blackhole); - if (n->nud_state & NUD_VALID) - n->nud_state = NUD_NOARP; - else - n->nud_state = NUD_NONE; - neigh_dbg(2, "neigh %p is stray\n", n); - } - write_unlock(&n->lock); - neigh_cleanup_and_release(n); + hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); + write_lock(&n->lock); + neigh_del_timer(n); + neigh_mark_dead(n); + if (refcount_read(&n->refcnt) != 1) { + /* The most unpleasant situation. + * We must destroy neighbour entry, + * but someone still uses it. + * + * The destroy will be delayed until + * the last user releases us, but + * we must kill timers etc. and move + * it to safe state. + */ + __skb_queue_purge(&n->arp_queue); + n->arp_queue_len_bytes = 0; + WRITE_ONCE(n->output, neigh_blackhole); + if (n->nud_state & NUD_VALID) + n->nud_state = NUD_NOARP; + else + n->nud_state = NUD_NONE; + neigh_dbg(2, "neigh %p is stray\n", n); } + write_unlock(&n->lock); + neigh_cleanup_and_release(n); } } @@ -530,27 +509,21 @@ static void neigh_get_hash_rnd(u32 *x) static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { - size_t size = (1 << shift) * sizeof(struct neighbour *); + size_t size = (1 << shift) * sizeof(struct hlist_head); + struct hlist_head *hash_heads; struct neigh_hash_table *ret; - struct neighbour __rcu **buckets; int i; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) { - buckets = kzalloc(size, GFP_ATOMIC); - } else { - buckets = (struct neighbour __rcu **) - __get_free_pages(GFP_ATOMIC | __GFP_ZERO, - get_order(size)); - kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); - } - if (!buckets) { + + hash_heads = kvzalloc(size, GFP_ATOMIC); + if (!hash_heads) { kfree(ret); return NULL; } - ret->hash_buckets = buckets; + ret->hash_heads = hash_heads; ret->hash_shift = shift; for (i = 0; i < NEIGH_NUM_HASH_RND; i++) neigh_get_hash_rnd(&ret->hash_rnd[i]); @@ -562,15 +535,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); - size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); - struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) { - kfree(buckets); - } else { - kmemleak_free(buckets); - free_pages((unsigned long)buckets, get_order(size)); - } + kvfree(nht->hash_heads); kfree(nht); } @@ -589,24 +555,17 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, return old_nht; for (i = 0; i < (1 << old_nht->hash_shift); i++) { - struct neighbour *n, *next; + struct hlist_node *tmp; + struct neighbour *n; - for (n = rcu_dereference_protected(old_nht->hash_buckets[i], - lockdep_is_held(&tbl->lock)); - n != NULL; - n = next) { + neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) { hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); hash >>= (32 - new_nht->hash_shift); - next = rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock)); - rcu_assign_pointer(n->next, - rcu_dereference_protected( - new_nht->hash_buckets[hash], - lockdep_is_held(&tbl->lock))); - rcu_assign_pointer(new_nht->hash_buckets[hash], n); + hlist_del_rcu(&n->hash); + hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]); } } @@ -693,11 +652,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, goto out_tbl_unlock; } - for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], - lockdep_is_held(&tbl->lock)); - n1 != NULL; - n1 = rcu_dereference_protected(n1->next, - lockdep_is_held(&tbl->lock))) { + neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) { if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); @@ -713,10 +668,11 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, list_add_tail(&n->managed_list, &n->tbl->managed_list); if (want_ref) neigh_hold(n); - rcu_assign_pointer(n->next, - rcu_dereference_protected(nht->hash_buckets[hash_val], - lockdep_is_held(&tbl->lock))); - rcu_assign_pointer(nht->hash_buckets[hash_val], n); + hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]); + + hlist_add_head_rcu(&n->dev_list, + neigh_get_dev_table(dev, tbl->family)); + write_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); rc = n; @@ -948,10 +904,10 @@ static void neigh_connect(struct neighbour *neigh) static void neigh_periodic_work(struct work_struct *work) { struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); + struct neigh_hash_table *nht; + struct hlist_node *tmp; struct neighbour *n; - struct neighbour __rcu **np; unsigned int i; - struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); @@ -976,10 +932,7 @@ static void neigh_periodic_work(struct work_struct *work) goto out; for (i = 0 ; i < (1 << nht->hash_shift); i++) { - np = &nht->hash_buckets[i]; - - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock))) != NULL) { + neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { unsigned int state; write_lock(&n->lock); @@ -988,7 +941,7 @@ static void neigh_periodic_work(struct work_struct *work) if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || (n->flags & NTF_EXT_LEARNED)) { write_unlock(&n->lock); - goto next_elt; + continue; } if (time_before(n->used, n->confirmed) && @@ -999,18 +952,14 @@ static void neigh_periodic_work(struct work_struct *work) (state == NUD_FAILED || !time_in_range_open(jiffies, n->used, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); - -next_elt: - np = &n->next; } /* * It's fine to release lock here, even if hash table @@ -1957,7 +1906,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid, extack); write_lock_bh(&tbl->lock); neigh_release(neigh); - neigh_remove_one(neigh, tbl); + neigh_remove_one(neigh); write_unlock_bh(&tbl->lock); out: @@ -2728,9 +2677,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; - for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; - n != NULL; - n = rcu_dereference(n->next)) { + idx = 0; + neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || @@ -2876,6 +2824,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); if (err < 0 && cb->strict_check) return err; + err = 0; s_t = cb->args[0]; @@ -3097,9 +3046,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; - for (n = rcu_dereference(nht->hash_buckets[chain]); - n != NULL; - n = rcu_dereference(n->next)) + neigh_for_each_in_bucket(n, &nht->hash_heads[chain]) cb(n, cookie); } read_unlock_bh(&tbl->lock); @@ -3111,29 +3058,25 @@ EXPORT_SYMBOL(neigh_for_each); void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)) { - int chain; struct neigh_hash_table *nht; + int chain; nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); for (chain = 0; chain < (1 << nht->hash_shift); chain++) { + struct hlist_node *tmp; struct neighbour *n; - struct neighbour __rcu **np; - np = &nht->hash_buckets[chain]; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock))) != NULL) { + neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) { int release; write_lock(&n->lock); release = cb(n); if (release) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); neigh_mark_dead(n); - } else - np = &n->next; + } write_unlock(&n->lock); if (release) neigh_cleanup_and_release(n); @@ -3190,43 +3133,53 @@ EXPORT_SYMBOL(neigh_xmit); #ifdef CONFIG_PROC_FS -static struct neighbour *neigh_get_first(struct seq_file *seq) +static struct neighbour *neigh_get_valid(struct seq_file *seq, + struct neighbour *n, + loff_t *pos) { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); + + if (!net_eq(dev_net(n->dev), net)) + return NULL; + + if (state->neigh_sub_iter) { + loff_t fakep = 0; + void *v; + + v = state->neigh_sub_iter(state, n, pos ? pos : &fakep); + if (!v) + return NULL; + if (pos) + return v; + } + + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + return n; + + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) + return n; + + return NULL; +} + +static struct neighbour *neigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; struct neigh_hash_table *nht = state->nht; - struct neighbour *n = NULL; - int bucket; + struct neighbour *n, *tmp; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; - for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { - n = rcu_dereference(nht->hash_buckets[bucket]); - while (n) { - if (!net_eq(dev_net(n->dev), net)) - goto next; - if (state->neigh_sub_iter) { - loff_t fakep = 0; - void *v; - - v = state->neigh_sub_iter(state, n, &fakep); - if (!v) - goto next; - } - if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) - break; - if (READ_ONCE(n->nud_state) & ~NUD_NOARP) - break; -next: - n = rcu_dereference(n->next); + while (++state->bucket < (1 << nht->hash_shift)) { + neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) { + tmp = neigh_get_valid(seq, n, NULL); + if (tmp) + return tmp; } - - if (n) - break; } - state->bucket = bucket; - return n; + return NULL; } static struct neighbour *neigh_get_next(struct seq_file *seq, @@ -3234,46 +3187,28 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net *net = seq_file_net(seq); - struct neigh_hash_table *nht = state->nht; + struct neighbour *tmp; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); + if (v) return n; } - n = rcu_dereference(n->next); - - while (1) { - while (n) { - if (!net_eq(dev_net(n->dev), net)) - goto next; - if (state->neigh_sub_iter) { - void *v = state->neigh_sub_iter(state, n, pos); - if (v) - return n; - goto next; - } - if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) - break; - if (READ_ONCE(n->nud_state) & ~NUD_NOARP) - break; -next: - n = rcu_dereference(n->next); + hlist_for_each_entry_continue(n, hash) { + tmp = neigh_get_valid(seq, n, pos); + if (tmp) { + n = tmp; + goto out; } - - if (n) - break; - - if (++state->bucket >= (1 << nht->hash_shift)) - break; - - n = rcu_dereference(nht->hash_buckets[state->bucket]); } + n = neigh_get_first(seq); +out: if (n && pos) --(*pos); + return n; } @@ -3376,7 +3311,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl struct neigh_seq_state *state = seq->private; state->tbl = tbl; - state->bucket = 0; + state->bucket = -1; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); rcu_read_lock(); @@ -3886,17 +3821,18 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); #endif /* CONFIG_SYSCTL */ +static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNEIGH, .doit = neigh_add}, + {.msgtype = RTM_DELNEIGH, .doit = neigh_delete}, + {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info}, + {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set}, +}; + static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, - RTNL_FLAG_DUMP_UNLOCKED); - - rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, - 0); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); - + rtnl_register_many(neigh_rtnl_msg_handlers); return 0; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 05cf5347f25e..2d9afc6e2161 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -409,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec); static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) { - WRITE_ONCE(dev->gro_flush_timeout, val); + netdev_set_gro_flush_timeout(dev, val); return 0; } @@ -429,7 +429,7 @@ static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val if (val > S32_MAX) return -ERANGE; - WRITE_ONCE(dev->napi_defer_hard_irqs, val); + netdev_set_defer_hard_irqs(dev, (u32)val); return 0; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b231b27d8268..ae34ac818cda 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -56,7 +56,6 @@ static bool init_net_initialized; * outside. */ DECLARE_RWSEM(pernet_ops_rwsem); -EXPORT_SYMBOL_GPL(pernet_ops_rwsem); #define MIN_PERNET_OPS_ID \ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) @@ -317,6 +316,7 @@ static __net_init void preinit_net_sysctl(struct net *net) */ net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; + net->core.sysctl_tstamp_allow_data = 1; } /* init code that must occur even if setup_net() is not called. */ @@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); mutex_init(&net->ipv4.ra_mutex); + +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + mutex_init(&net->rtnl_mutex); + lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL); +#endif + preinit_net_sysctl(net); } @@ -1151,13 +1157,23 @@ static void __init netns_ipv4_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_early_demux); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_l3mdev_accept); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_reordering); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_rmem); - CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 22); } #endif +static const struct rtnl_msg_handler net_ns_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNSID, .doit = rtnl_net_newid, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_GETNSID, .doit = rtnl_net_getid, + .dumpit = rtnl_net_dumpid, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + void __init net_ns_init(void) { struct net_generic *ng; @@ -1195,11 +1211,7 @@ void __init net_ns_init(void) if (register_pernet_subsys(&net_ns_ops)) panic("Could not register network namespace subsystems"); - rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, - RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); + rtnl_register_many(net_ns_rtnl_msg_handlers); } static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index b28424ae06d5..a89cbd8d87c3 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -14,12 +14,16 @@ /* Integer value ranges */ static const struct netlink_range_validation netdev_a_page_pool_id_range = { .min = 1ULL, - .max = 4294967295ULL, + .max = U32_MAX, }; static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = { .min = 1ULL, - .max = 2147483647ULL, + .max = S32_MAX, +}; + +static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range = { + .max = S32_MAX, }; /* Common nested types */ @@ -87,6 +91,14 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] [NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), }; +/* NETDEV_CMD_NAPI_SET - do */ +static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = { + [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, + [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), + [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, + [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -171,6 +183,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_DMABUF_FD, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_NAPI_SET, + .doit = netdev_nl_napi_set_doit, + .policy = netdev_napi_set_nl_policy, + .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 8cda334fd042..e09dd7539ff2 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -33,6 +33,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 1cb954f2d39e..9527dd46e4dc 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -24,7 +24,7 @@ struct netdev_nl_dump_ctx { static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx); + NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx); return (struct netdev_nl_dump_ctx *)cb->ctx; } @@ -161,6 +161,9 @@ static int netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, const struct genl_info *info) { + unsigned long irq_suspend_timeout; + unsigned long gro_flush_timeout; + u32 napi_defer_hard_irqs; void *hdr; pid_t pid; @@ -189,6 +192,21 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, goto nla_put_failure; } + napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi); + if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS, + napi_defer_hard_irqs)) + goto nla_put_failure; + + irq_suspend_timeout = napi_get_irq_suspend_timeout(napi); + if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + irq_suspend_timeout)) + goto nla_put_failure; + + gro_flush_timeout = napi_get_gro_flush_timeout(napi); + if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + gro_flush_timeout)) + goto nla_put_failure; + genlmsg_end(rsp, hdr); return 0; @@ -215,6 +233,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; rtnl_lock(); + rcu_read_lock(); napi = napi_by_id(napi_id); if (napi) { @@ -224,6 +243,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) err = -ENOENT; } + rcu_read_unlock(); rtnl_unlock(); if (err) @@ -292,6 +312,59 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } static int +netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) +{ + u64 irq_suspend_timeout = 0; + u64 gro_flush_timeout = 0; + u32 defer = 0; + + if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { + defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); + napi_set_defer_hard_irqs(napi, defer); + } + + if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) { + irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]); + napi_set_irq_suspend_timeout(napi, irq_suspend_timeout); + } + + if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) { + gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]); + napi_set_gro_flush_timeout(napi, gro_flush_timeout); + } + + return 0; +} + +int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct napi_struct *napi; + unsigned int napi_id; + int err; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) + return -EINVAL; + + napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); + + rtnl_lock(); + rcu_read_lock(); + + napi = napi_by_id(napi_id); + if (napi) { + err = netdev_nl_napi_set_config(napi, info); + } else { + NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); + err = -ENOENT; + } + + rcu_read_unlock(); + rtnl_unlock(); + + return err; +} + +static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index aa49b92e9194..2e459b9d88eb 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -45,9 +45,6 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 - -static struct sk_buff_head skb_pool; - #define USEC_PER_POLL 50 #define MAX_SKB_SIZE \ @@ -234,20 +231,23 @@ void netpoll_poll_enable(struct net_device *dev) up(&ni->dev_lock); } -static void refill_skbs(void) +static void refill_skbs(struct netpoll *np) { + struct sk_buff_head *skb_pool; struct sk_buff *skb; unsigned long flags; - spin_lock_irqsave(&skb_pool.lock, flags); - while (skb_pool.qlen < MAX_SKBS) { + skb_pool = &np->skb_pool; + + spin_lock_irqsave(&skb_pool->lock, flags); + while (skb_pool->qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(&skb_pool, skb); + __skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool.lock, flags); + spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) @@ -284,12 +284,12 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) struct sk_buff *skb; zap_completion_queue(); - refill_skbs(); + refill_skbs(np); repeat: skb = alloc_skb(len, GFP_ATOMIC); if (!skb) - skb = skb_dequeue(&skb_pool); + skb = skb_dequeue(&np->skb_pool); if (!skb) { if (++count < 10) { @@ -531,6 +531,14 @@ static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) return -1; } +static void skb_pool_flush(struct netpoll *np) +{ + struct sk_buff_head *skb_pool; + + skb_pool = &np->skb_pool; + skb_queue_purge_reason(skb_pool, SKB_CONSUMED); +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; @@ -626,7 +634,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) goto out; } - if (!ndev->npinfo) { + if (!rcu_access_pointer(ndev->npinfo)) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; @@ -641,7 +649,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) ops = ndev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo); + err = ops->ndo_netpoll_setup(ndev); if (err) goto free_npinfo; } @@ -673,6 +681,8 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; int err; + skb_queue_head_init(&np->skb_pool); + rtnl_lock(); if (np->dev_name[0]) { struct net *net = current->nsproxy->net_ns; @@ -773,14 +783,16 @@ put_noaddr: } /* fill up the skb queue */ - refill_skbs(); + refill_skbs(np); err = __netpoll_setup(np, ndev); if (err) - goto put; + goto flush; rtnl_unlock(); return 0; +flush: + skb_pool_flush(np); put: DEBUG_NET_WARN_ON_ONCE(np->dev); if (ip_overwritten) @@ -792,13 +804,6 @@ unlock: } EXPORT_SYMBOL(netpoll_setup); -static int __init netpoll_init(void) -{ - skb_queue_head_init(&skb_pool); - return 0; -} -core_initcall(netpoll_init); - static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) { struct netpoll_info *npinfo = @@ -835,6 +840,8 @@ void __netpoll_cleanup(struct netpoll *np) call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); } else RCU_INIT_POINTER(np->dev->npinfo, NULL); + + skb_pool_flush(np); } EXPORT_SYMBOL_GPL(__netpoll_cleanup); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a813d30d2135..f89cf93f6eb4 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -950,6 +950,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, if (netmem && *offset + size > max_size) { netmem = page_pool_drain_frag(pool, netmem); if (netmem) { + recycle_stat_inc(pool, cached); alloc_stat_inc(pool, fast); goto frag_reset; } @@ -974,7 +975,6 @@ frag_reset: pool->frag_users++; pool->frag_offset = *offset + size; - alloc_stat_inc(pool, fast); return netmem; } EXPORT_SYMBOL(page_pool_alloc_frag_netmem); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2ba5cd965d3f..dd142f444659 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -179,6 +179,166 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_lock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_lock); + +void __rtnl_net_unlock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_unlock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_unlock); + +void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); + __rtnl_net_lock(net); +} +EXPORT_SYMBOL(rtnl_net_lock); + +void rtnl_net_unlock(struct net *net) +{ + __rtnl_net_unlock(net); + rtnl_unlock(); +} +EXPORT_SYMBOL(rtnl_net_unlock); + +int rtnl_net_trylock(struct net *net) +{ + int ret = rtnl_trylock(); + + if (ret) + __rtnl_net_lock(net); + + return ret; +} +EXPORT_SYMBOL(rtnl_net_trylock); + +static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) +{ + if (net_eq(net_a, net_b)) + return 0; + + /* always init_net first */ + if (net_eq(net_a, &init_net)) + return -1; + + if (net_eq(net_b, &init_net)) + return 1; + + /* otherwise lock in ascending order */ + return net_a < net_b ? -1 : 1; +} + +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) +{ + const struct net *net_a, *net_b; + + net_a = container_of(a, struct net, rtnl_mutex.dep_map); + net_b = container_of(b, struct net, rtnl_mutex.dep_map); + + return rtnl_net_cmp_locks(net_a, net_b); +} + +bool rtnl_net_is_locked(struct net *net) +{ + return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex); +} +EXPORT_SYMBOL(rtnl_net_is_locked); + +bool lockdep_rtnl_net_is_held(struct net *net) +{ + return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex); +} +EXPORT_SYMBOL(lockdep_rtnl_net_is_held); +#else +static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) +{ + /* No need to swap */ + return -1; +} +#endif + +struct rtnl_nets { + /* ->newlink() needs to freeze 3 netns at most; + * 2 for the new device, 1 for its peer. + */ + struct net *net[3]; + unsigned char len; +}; + +static void rtnl_nets_init(struct rtnl_nets *rtnl_nets) +{ + memset(rtnl_nets, 0, sizeof(*rtnl_nets)); +} + +static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets) +{ + int i; + + for (i = 0; i < rtnl_nets->len; i++) { + put_net(rtnl_nets->net[i]); + rtnl_nets->net[i] = NULL; + } + + rtnl_nets->len = 0; +} + +/** + * rtnl_nets_add - Add netns to be locked before ->newlink(). + * + * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net(). + * @net: netns pointer with an extra refcnt held. + * + * The extra refcnt is released in rtnl_nets_destroy(). + */ +static void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net) +{ + int i; + + DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net)); + + for (i = 0; i < rtnl_nets->len; i++) { + switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) { + case 0: + put_net(net); + return; + case 1: + swap(rtnl_nets->net[i], net); + } + } + + rtnl_nets->net[i] = net; + rtnl_nets->len++; +} + +static void rtnl_nets_lock(struct rtnl_nets *rtnl_nets) +{ + int i; + + rtnl_lock(); + + for (i = 0; i < rtnl_nets->len; i++) + __rtnl_net_lock(rtnl_nets->net[i]); +} + +static void rtnl_nets_unlock(struct rtnl_nets *rtnl_nets) +{ + int i; + + for (i = 0; i < rtnl_nets->len; i++) + __rtnl_net_unlock(rtnl_nets->net[i]); + + rtnl_unlock(); +} + static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) @@ -269,64 +429,13 @@ unlock: } /** - * rtnl_register_module - Register a rtnetlink message type - * - * @owner: module registering the hook (THIS_MODULE) - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions - * - * Like rtnl_register, but for use by removable modules. - */ -int rtnl_register_module(struct module *owner, - int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - return rtnl_register_internal(owner, protocol, msgtype, - doit, dumpit, flags); -} -EXPORT_SYMBOL_GPL(rtnl_register_module); - -/** - * rtnl_register - Register a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions - * - * Registers the specified function pointers (at least one of them has - * to be non-NULL) to be called whenever a request message for the - * specified protocol family and message type is received. - * - * The special protocol family PF_UNSPEC may be used to define fallback - * function pointers for the case when no entry for the specific protocol - * family exists. - */ -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - int err; - - err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit, - flags); - if (err) - pr_err("Unable to register rtnetlink message handler, " - "protocol = %d, message type = %d\n", protocol, msgtype); -} - -/** * rtnl_unregister - Unregister a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * * Returns 0 on success or a negative error code. */ -int rtnl_unregister(int protocol, int msgtype) +static int rtnl_unregister(int protocol, int msgtype) { struct rtnl_link __rcu **tab; struct rtnl_link *link; @@ -349,7 +458,6 @@ int rtnl_unregister(int protocol, int msgtype) return 0; } -EXPORT_SYMBOL_GPL(rtnl_unregister); /** * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol @@ -384,6 +492,26 @@ void rtnl_unregister_all(int protocol) } EXPORT_SYMBOL_GPL(rtnl_unregister_all); +/** + * __rtnl_register_many - Register rtnetlink message types + * @handlers: Array of struct rtnl_msg_handlers + * @n: The length of @handlers + * + * Registers the specified function pointers (at least one of them has + * to be non-NULL) to be called whenever a request message for the + * specified protocol family and message type is received. + * + * The special protocol family PF_UNSPEC may be used to define fallback + * function pointers for the case when no entry for the specific protocol + * family exists. + * + * When one element of @handlers fails to register, + * 1) built-in: panics. + * 2) modules : the previous successful registrations are unwinded + * and an error is returned. + * + * Use rtnl_register_many(). + */ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) { const struct rtnl_msg_handler *handler; @@ -394,6 +522,10 @@ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) handler->msgtype, handler->doit, handler->dumpit, handler->flags); if (err) { + if (!handler->owner) + panic("Unable to register rtnetlink message " + "handlers, %pS\n", handlers); + __rtnl_unregister_many(handlers, i); break; } @@ -413,46 +545,33 @@ void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n) } EXPORT_SYMBOL_GPL(__rtnl_unregister_many); +static DEFINE_MUTEX(link_ops_mutex); static LIST_HEAD(link_ops); -static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index) { - const struct rtnl_link_ops *ops; + struct rtnl_link_ops *ops; + + rcu_read_lock(); - list_for_each_entry(ops, &link_ops, list) { - if (!strcmp(ops->kind, kind)) - return ops; + list_for_each_entry_rcu(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } } - return NULL; -} -/** - * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. - * @ops: struct rtnl_link_ops * to register - * - * The caller must hold the rtnl_mutex. This function should be used - * by drivers that create devices during module initialization. It - * must be called before registering the devices. - * - * Returns 0 on success or a negative error code. - */ -int __rtnl_link_register(struct rtnl_link_ops *ops) -{ - if (rtnl_link_ops_get(ops->kind)) - return -EEXIST; + ops = NULL; +unlock: + rcu_read_unlock(); - /* The check for alloc/setup is here because if ops - * does not have that filled up, it is not possible - * to use the ops for creating device. So do not - * fill up dellink as well. That disables rtnl_dellink. - */ - if ((ops->alloc || ops->setup) && !ops->dellink) - ops->dellink = unregister_netdevice_queue; + return ops; +} - list_add_tail(&ops->list, &link_ops); - return 0; +static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } -EXPORT_SYMBOL_GPL(__rtnl_link_register); /** * rtnl_link_register - Register rtnl_link_ops with rtnetlink. @@ -462,6 +581,7 @@ EXPORT_SYMBOL_GPL(__rtnl_link_register); */ int rtnl_link_register(struct rtnl_link_ops *ops) { + struct rtnl_link_ops *tmp; int err; /* Sanity-check max sizes to avoid stack buffer overflow. */ @@ -469,9 +589,31 @@ int rtnl_link_register(struct rtnl_link_ops *ops) ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)) return -EINVAL; - rtnl_lock(); - err = __rtnl_link_register(ops); - rtnl_unlock(); + /* The check for alloc/setup is here because if ops + * does not have that filled up, it is not possible + * to use the ops for creating device. So do not + * fill up dellink as well. That disables rtnl_dellink. + */ + if ((ops->alloc || ops->setup) && !ops->dellink) + ops->dellink = unregister_netdevice_queue; + + err = init_srcu_struct(&ops->srcu); + if (err) + return err; + + mutex_lock(&link_ops_mutex); + + list_for_each_entry(tmp, &link_ops, list) { + if (!strcmp(ops->kind, tmp->kind)) { + err = -EEXIST; + goto unlock; + } + } + + list_add_tail_rcu(&ops->list, &link_ops); +unlock: + mutex_unlock(&link_ops_mutex); + return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); @@ -488,25 +630,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) unregister_netdevice_many(&list_kill); } -/** - * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. - * @ops: struct rtnl_link_ops * to unregister - * - * The caller must hold the rtnl_mutex and guarantee net_namespace_list - * integrity (hold pernet_ops_rwsem for writing to close the race - * with setup_net() and cleanup_net()). - */ -void __rtnl_link_unregister(struct rtnl_link_ops *ops) -{ - struct net *net; - - for_each_net(net) { - __rtnl_kill_links(net, ops); - } - list_del(&ops->list); -} -EXPORT_SYMBOL_GPL(__rtnl_link_unregister); - /* Return with the rtnl_lock held when there are no network * devices unregistering in any network namespace. */ @@ -535,10 +658,22 @@ static void rtnl_lock_unregistering_all(void) */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { + struct net *net; + + mutex_lock(&link_ops_mutex); + list_del_rcu(&ops->list); + mutex_unlock(&link_ops_mutex); + + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); + /* Close the race with setup_net() and cleanup_net() */ down_write(&pernet_ops_rwsem); rtnl_lock_unregistering_all(); - __rtnl_link_unregister(ops); + + for_each_net(net) + __rtnl_kill_links(net, ops); + rtnl_unlock(); up_write(&pernet_ops_rwsem); } @@ -595,31 +730,51 @@ static size_t rtnl_link_get_size(const struct net_device *dev) static LIST_HEAD(rtnl_af_ops); -static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index) { - const struct rtnl_af_ops *ops; + struct rtnl_af_ops *ops; ASSERT_RTNL(); - list_for_each_entry(ops, &rtnl_af_ops, list) { - if (ops->family == family) - return ops; + rcu_read_lock(); + + list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { + if (ops->family == family) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } } - return NULL; + ops = NULL; +unlock: + rcu_read_unlock(); + + return ops; +} + +static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } /** * rtnl_af_register - Register rtnl_af_ops with rtnetlink. * @ops: struct rtnl_af_ops * to register * - * Returns 0 on success or a negative error code. + * Return: 0 on success or a negative error code. */ -void rtnl_af_register(struct rtnl_af_ops *ops) +int rtnl_af_register(struct rtnl_af_ops *ops) { + int err = init_srcu_struct(&ops->srcu); + + if (err) + return err; + rtnl_lock(); list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); + + return 0; } EXPORT_SYMBOL_GPL(rtnl_af_register); @@ -634,6 +789,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops) rtnl_unlock(); synchronize_rcu(); + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); @@ -1147,6 +1304,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + rtnl_devlink_port_size(dev) + rtnl_dpll_pin_size(dev) + + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + 0; } @@ -1896,6 +2054,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, READ_ONCE(dev->tso_max_size)) || nla_put_u32(skb, IFLA_TSO_MAX_SEGS, READ_ONCE(dev->tso_max_segs)) || + nla_put_uint(skb, IFLA_MAX_PACING_OFFLOAD_HORIZON, + READ_ONCE(dev->max_pacing_offload_horizon)) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, READ_ONCE(dev->num_rx_queues)) || @@ -2004,6 +2164,7 @@ nla_put_failure: } static const struct nla_policy ifla_policy[IFLA_MAX+1] = { + [IFLA_UNSPEC] = { .strict_start_type = IFLA_DPLL_PIN }, [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -2112,10 +2273,11 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, }; -static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) +static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla, + int *ops_srcu_index) { - const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; + struct rtnl_link_ops *ops = NULL; if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0) return NULL; @@ -2124,7 +2286,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla char kind[MODULE_NAME_LEN]; nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); + ops = rtnl_link_ops_get(kind, ops_srcu_index); } return ops; @@ -2244,8 +2406,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - const struct rtnl_link_ops *kind_ops = NULL; struct netlink_ext_ack *extack = cb->extack; + struct rtnl_link_ops *kind_ops = NULL; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; @@ -2256,6 +2418,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net *tgt_net = net; u32 ext_filter_mask = 0; struct net_device *dev; + int ops_srcu_index; int master_idx = 0; int netnsid = -1; int err, i; @@ -2289,7 +2452,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) master_idx = nla_get_u32(tb[i]); break; case IFLA_LINKINFO: - kind_ops = linkinfo_to_kind_ops(tb[i]); + kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index); break; default: if (cb->strict_check) { @@ -2315,6 +2478,10 @@ walk_entries: if (err < 0) break; } + + if (kind_ops) + rtnl_link_ops_put(kind_ops, ops_srcu_index); + cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); if (netnsid >= 0) @@ -2345,9 +2512,10 @@ int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, } EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg); -struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) +static struct net *rtnl_link_get_net_ifla(struct nlattr *tb[]) { - struct net *net; + struct net *net = NULL; + /* Examine the link attributes and figure out which * network namespace we are talking about. */ @@ -2355,8 +2523,17 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); else if (tb[IFLA_NET_NS_FD]) net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD])); - else + + return net; +} + +struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) +{ + struct net *net = rtnl_link_get_net_ifla(tb); + + if (!net) net = get_net(src_net); + return net; } EXPORT_SYMBOL(rtnl_link_get_net); @@ -2496,20 +2673,24 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], int rem, err; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - af_ops = rtnl_af_lookup(nla_type(af)); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); if (!af_ops) return -EAFNOSUPPORT; if (!af_ops->set_link_af) - return -EOPNOTSUPP; - - if (af_ops->validate_link_af) { + err = -EOPNOTSUPP; + else if (af_ops->validate_link_af) err = af_ops->validate_link_af(dev, af, extack); - if (err < 0) - return err; - } + else + err = 0; + + rtnl_af_put(af_ops, af_ops_srcu_index); + + if (err < 0) + return err; } } @@ -2800,8 +2981,8 @@ static int do_set_proto_down(struct net_device *dev, #define DO_SETLINK_MODIFIED 0x01 /* notify flag means notify + modified. */ #define DO_SETLINK_NOTIFY 0x03 -static int do_setlink(const struct sk_buff *skb, - struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, struct net_device *dev, + struct net *tgt_net, struct ifinfomsg *ifm, struct netlink_ext_ack *extack, struct nlattr **tb, int status) { @@ -2809,32 +2990,25 @@ static int do_setlink(const struct sk_buff *skb, char ifname[IFNAMSIZ]; int err; + err = validate_linkmsg(dev, tb, extack); + if (err < 0) + goto errout; + if (tb[IFLA_IFNAME]) nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else ifname[0] = '\0'; - if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { + if (!net_eq(tgt_net, dev_net(dev))) { const char *pat = ifname[0] ? ifname : NULL; - struct net *net; int new_ifindex; - net = rtnl_link_get_net_capable(skb, dev_net(dev), - tb, CAP_NET_ADMIN); - if (IS_ERR(net)) { - err = PTR_ERR(net); - goto errout; - } - - if (tb[IFLA_NEW_IFINDEX]) - new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]); - else - new_ifindex = 0; + new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); - err = __dev_change_net_namespace(dev, net, pat, new_ifindex); - put_net(net); + err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex); if (err) goto errout; + status |= DO_SETLINK_MODIFIED; } @@ -3093,11 +3267,18 @@ static int do_setlink(const struct sk_buff *skb, int rem; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); + if (!af_ops) { + err = -EAFNOSUPPORT; + goto errout; + } err = af_ops->set_link_af(dev, af, extack); + rtnl_af_put(af_ops, af_ops_srcu_index); + if (err < 0) goto errout; @@ -3194,11 +3375,13 @@ static struct net_device *rtnl_dev_get(struct net *net, static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); - struct ifinfomsg *ifm; - struct net_device *dev; - int err; struct nlattr *tb[IFLA_MAX+1]; + struct net_device *dev = NULL; + struct rtnl_nets rtnl_nets; + struct net *tgt_net; + int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); @@ -3209,25 +3392,31 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; - err = -EINVAL; - ifm = nlmsg_data(nlh); + tgt_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); + if (IS_ERR(tgt_net)) { + err = PTR_ERR(tgt_net); + goto errout; + } + + rtnl_nets_init(&rtnl_nets); + rtnl_nets_add(&rtnl_nets, get_net(net)); + rtnl_nets_add(&rtnl_nets, tgt_net); + + rtnl_nets_lock(&rtnl_nets); + if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(net, tb); else - goto errout; + err = -EINVAL; - if (dev == NULL) { + if (dev) + err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); + else if (!err) err = -ENODEV; - goto errout; - } - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - goto errout; - - err = do_setlink(skb, dev, ifm, extack, tb, 0); + rtnl_nets_unlock(&rtnl_nets); errout: return err; } @@ -3287,14 +3476,14 @@ EXPORT_SYMBOL_GPL(rtnl_delete_link); static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); u32 portid = NETLINK_CB(skb).portid; - struct net *tgt_net = net; - struct net_device *dev = NULL; - struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; - int err; + struct net_device *dev = NULL; + struct net *tgt_net = net; int netnsid = -1; + int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); @@ -3312,27 +3501,24 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(tgt_net); } - err = -EINVAL; - ifm = nlmsg_data(nlh); + rtnl_net_lock(tgt_net); + if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(tgt_net, tb); + + if (dev) + err = rtnl_delete_link(dev, portid, nlh); + else if (ifm->ifi_index > 0 || tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + err = -ENODEV; else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else - goto out; - - if (!dev) { - if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0) - err = -ENODEV; - - goto out; - } + err = -EINVAL; - err = rtnl_delete_link(dev, portid, nlh); + rtnl_net_unlock(tgt_net); -out: if (netnsid >= 0) put_net(tgt_net); @@ -3459,21 +3645,90 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, } EXPORT_SYMBOL(rtnl_create_link); +struct rtnl_newlink_tbs { + struct nlattr *tb[IFLA_MAX + 1]; + struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; + struct nlattr *attr[RTNL_MAX_TYPE + 1]; + struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; +}; + +static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh, + const struct rtnl_link_ops *ops, + struct net_device *dev, struct net *tgt_net, + struct rtnl_newlink_tbs *tbs, + struct nlattr **data, + struct netlink_ext_ack *extack) +{ + struct nlattr ** const linkinfo = tbs->linkinfo; + struct nlattr ** const tb = tbs->tb; + int status = 0; + int err; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || !ops->changelink) + return -EOPNOTSUPP; + + err = ops->changelink(dev, tb, data, extack); + if (err < 0) + return err; + + status |= DO_SETLINK_NOTIFY; + } + + if (linkinfo[IFLA_INFO_SLAVE_DATA]) { + const struct rtnl_link_ops *m_ops = NULL; + struct nlattr **slave_data = NULL; + struct net_device *master_dev; + + master_dev = netdev_master_upper_dev_get(dev); + if (master_dev) + m_ops = master_dev->rtnl_link_ops; + + if (!m_ops || !m_ops->slave_changelink) + return -EOPNOTSUPP; + + if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE) + return -EINVAL; + + if (m_ops->slave_maxtype) { + err = nla_parse_nested_deprecated(tbs->slave_attr, + m_ops->slave_maxtype, + linkinfo[IFLA_INFO_SLAVE_DATA], + m_ops->slave_policy, extack); + if (err < 0) + return err; + + slave_data = tbs->slave_attr; + } + + err = m_ops->slave_changelink(master_dev, dev, tb, slave_data, extack); + if (err < 0) + return err; + + status |= DO_SETLINK_NOTIFY; + } + + return do_setlink(skb, dev, tgt_net, nlmsg_data(nlh), extack, tb, status); +} + static int rtnl_group_changelink(const struct sk_buff *skb, - struct net *net, int group, - struct ifinfomsg *ifm, - struct netlink_ext_ack *extack, - struct nlattr **tb) + struct net *net, struct net *tgt_net, + int group, struct ifinfomsg *ifm, + struct netlink_ext_ack *extack, + struct nlattr **tb) { struct net_device *dev, *aux; int err; for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - return err; - err = do_setlink(skb, dev, ifm, extack, tb, 0); + err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); if (err < 0) return err; } @@ -3484,6 +3739,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, const struct rtnl_link_ops *ops, + struct net *tgt_net, struct net *link_net, const struct nlmsghdr *nlh, struct nlattr **tb, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3491,7 +3747,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, unsigned char name_assign_type = NET_NAME_USER; struct net *net = sock_net(skb->sk); u32 portid = NETLINK_CB(skb).portid; - struct net *dest_net, *link_net; struct net_device *dev; char ifname[IFNAMSIZ]; int err; @@ -3506,27 +3761,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, name_assign_type = NET_NAME_ENUM; } - dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - - if (tb[IFLA_LINK_NETNSID]) { - int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); - - link_net = get_net_ns_by_id(dest_net, id); - if (!link_net) { - NL_SET_ERR_MSG(extack, "Unknown network namespace id"); - err = -EINVAL; - goto out; - } - err = -EPERM; - if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) - goto out; - } else { - link_net = NULL; - } - - dev = rtnl_create_link(link_net ? : dest_net, ifname, + dev = rtnl_create_link(link_net ? : tgt_net, ifname, name_assign_type, ops, tb, extack); if (IS_ERR(dev)) { err = PTR_ERR(dev); @@ -3548,7 +3783,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, if (err < 0) goto out_unregister; if (link_net) { - err = dev_change_net_namespace(dev, dest_net, ifname); + err = dev_change_net_namespace(dev, tgt_net, ifname); if (err < 0) goto out_unregister; } @@ -3558,9 +3793,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out_unregister; } out: - if (link_net) - put_net(link_net); - put_net(dest_net); return err; out_unregister: if (ops->newlink) { @@ -3574,41 +3806,49 @@ out_unregister: goto out; } -struct rtnl_newlink_tbs { +static int rtnl_add_peer_net(struct rtnl_nets *rtnl_nets, + const struct rtnl_link_ops *ops, + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ struct nlattr *tb[IFLA_MAX + 1]; - struct nlattr *attr[RTNL_MAX_TYPE + 1]; - struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; -}; + struct net *net; + int err; + + if (!data || !data[ops->peer_type]) + return 0; + + err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack); + if (err < 0) + return err; + + if (ops->validate) { + err = ops->validate(tb, NULL, extack); + if (err < 0) + return err; + } + + net = rtnl_link_get_net_ifla(tb); + if (IS_ERR(net)) + return PTR_ERR(net); + if (net) + rtnl_nets_add(rtnl_nets, net); + + return 0; +} static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + const struct rtnl_link_ops *ops, + struct net *tgt_net, struct net *link_net, struct rtnl_newlink_tbs *tbs, + struct nlattr **data, struct netlink_ext_ack *extack) { - struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; struct nlattr ** const tb = tbs->tb; - const struct rtnl_link_ops *m_ops; - struct net_device *master_dev; struct net *net = sock_net(skb->sk); - const struct rtnl_link_ops *ops; - struct nlattr **slave_data; - char kind[MODULE_NAME_LEN]; struct net_device *dev; struct ifinfomsg *ifm; - struct nlattr **data; bool link_specified; - int err; - -#ifdef CONFIG_MODULES -replay: -#endif - err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, - ifla_policy, extack); - if (err < 0) - return err; - - err = rtnl_ensure_unique_netns(tb, extack, false); - if (err < 0) - return err; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { @@ -3625,151 +3865,148 @@ replay: dev = NULL; } - master_dev = NULL; - m_ops = NULL; - if (dev) { - master_dev = netdev_master_upper_dev_get(dev); - if (master_dev) - m_ops = master_dev->rtnl_link_ops; + if (dev) + return rtnl_changelink(skb, nlh, ops, dev, tgt_net, tbs, data, extack); + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + /* No dev found and NLM_F_CREATE not set. Requested dev does not exist, + * or it's for a group + */ + if (link_specified || !tb[IFLA_GROUP]) + return -ENODEV; + + return rtnl_group_changelink(skb, net, tgt_net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, extack, tb); } + if (tb[IFLA_MAP] || tb[IFLA_PROTINFO]) + return -EOPNOTSUPP; + + if (!ops) { + NL_SET_ERR_MSG(extack, "Unknown device type"); + return -EOPNOTSUPP; + } + + return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, nlh, tb, data, extack); +} + +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr **tb, **linkinfo, **data = NULL; + struct net *tgt_net, *link_net = NULL; + struct rtnl_link_ops *ops = NULL; + struct rtnl_newlink_tbs *tbs; + struct rtnl_nets rtnl_nets; + int ops_srcu_index; + int ret; + + tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); + if (!tbs) + return -ENOMEM; + + tb = tbs->tb; + ret = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, extack); + if (ret < 0) + goto free; + + ret = rtnl_ensure_unique_netns(tb, extack, false); + if (ret < 0) + goto free; + + linkinfo = tbs->linkinfo; if (tb[IFLA_LINKINFO]) { - err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, + ret = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO], ifla_info_policy, NULL); - if (err < 0) - return err; - } else - memset(linkinfo, 0, sizeof(linkinfo)); + if (ret < 0) + goto free; + } else { + memset(linkinfo, 0, sizeof(tbs->linkinfo)); + } if (linkinfo[IFLA_INFO_KIND]) { + char kind[MODULE_NAME_LEN]; + nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); - } else { - kind[0] = '\0'; - ops = NULL; + ops = rtnl_link_ops_get(kind, &ops_srcu_index); +#ifdef CONFIG_MODULES + if (!ops) { + request_module("rtnl-link-%s", kind); + ops = rtnl_link_ops_get(kind, &ops_srcu_index); + } +#endif } - data = NULL; + rtnl_nets_init(&rtnl_nets); + if (ops) { - if (ops->maxtype > RTNL_MAX_TYPE) - return -EINVAL; + if (ops->maxtype > RTNL_MAX_TYPE) { + ret = -EINVAL; + goto put_ops; + } if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, + ret = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, linkinfo[IFLA_INFO_DATA], ops->policy, extack); - if (err < 0) - return err; + if (ret < 0) + goto put_ops; + data = tbs->attr; } + if (ops->validate) { - err = ops->validate(tb, data, extack); - if (err < 0) - return err; + ret = ops->validate(tb, data, extack); + if (ret < 0) + goto put_ops; } - } - - slave_data = NULL; - if (m_ops) { - if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE) - return -EINVAL; - if (m_ops->slave_maxtype && - linkinfo[IFLA_INFO_SLAVE_DATA]) { - err = nla_parse_nested_deprecated(tbs->slave_attr, - m_ops->slave_maxtype, - linkinfo[IFLA_INFO_SLAVE_DATA], - m_ops->slave_policy, - extack); - if (err < 0) - return err; - slave_data = tbs->slave_attr; + if (ops->peer_type) { + ret = rtnl_add_peer_net(&rtnl_nets, ops, data, extack); + if (ret < 0) + goto put_ops; } } - if (dev) { - int status = 0; + tgt_net = rtnl_link_get_net_capable(skb, sock_net(skb->sk), tb, CAP_NET_ADMIN); + if (IS_ERR(tgt_net)) { + ret = PTR_ERR(tgt_net); + goto put_net; + } - if (nlh->nlmsg_flags & NLM_F_EXCL) - return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_REPLACE) - return -EOPNOTSUPP; + rtnl_nets_add(&rtnl_nets, tgt_net); - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - return err; - - if (linkinfo[IFLA_INFO_DATA]) { - if (!ops || ops != dev->rtnl_link_ops || - !ops->changelink) - return -EOPNOTSUPP; + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); - err = ops->changelink(dev, tb, data, extack); - if (err < 0) - return err; - status |= DO_SETLINK_NOTIFY; + link_net = get_net_ns_by_id(tgt_net, id); + if (!link_net) { + NL_SET_ERR_MSG(extack, "Unknown network namespace id"); + ret = -EINVAL; + goto put_net; } - if (linkinfo[IFLA_INFO_SLAVE_DATA]) { - if (!m_ops || !m_ops->slave_changelink) - return -EOPNOTSUPP; + rtnl_nets_add(&rtnl_nets, link_net); - err = m_ops->slave_changelink(master_dev, dev, tb, - slave_data, extack); - if (err < 0) - return err; - status |= DO_SETLINK_NOTIFY; + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + goto put_net; } - - return do_setlink(skb, dev, ifm, extack, tb, status); - } - - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - /* No dev found and NLM_F_CREATE not set. Requested dev does not exist, - * or it's for a group - */ - if (link_specified) - return -ENODEV; - if (tb[IFLA_GROUP]) - return rtnl_group_changelink(skb, net, - nla_get_u32(tb[IFLA_GROUP]), - ifm, extack, tb); - return -ENODEV; } - if (tb[IFLA_MAP] || tb[IFLA_PROTINFO]) - return -EOPNOTSUPP; + rtnl_nets_lock(&rtnl_nets); + ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack); + rtnl_nets_unlock(&rtnl_nets); - if (!ops) { -#ifdef CONFIG_MODULES - if (kind[0]) { - __rtnl_unlock(); - request_module("rtnl-link-%s", kind); - rtnl_lock(); - ops = rtnl_link_ops_get(kind); - if (ops) - goto replay; - } -#endif - NL_SET_ERR_MSG(extack, "Unknown device type"); - return -EOPNOTSUPP; - } - - return rtnl_newlink_create(skb, ifm, ops, nlh, tb, data, extack); -} - -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct rtnl_newlink_tbs *tbs; - int ret; - - tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); - if (!tbs) - return -ENOMEM; - - ret = __rtnl_newlink(skb, nlh, tbs, extack); +put_net: + rtnl_nets_destroy(&rtnl_nets); +put_ops: + if (ops) + rtnl_link_ops_put(ops, ops_srcu_index); +free: kfree(tbs); return ret; } @@ -4341,9 +4578,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; + bool notified = false; err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags, extack); + nlh->nlmsg_flags, ¬ified, extack); if (err) goto out; else @@ -4352,16 +4590,18 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF)) { + bool notified = false; + if (dev->netdev_ops->ndo_fdb_add) err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags, - extack); + ¬ified, extack); else err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags); - if (!err) { + if (!err && !notified) { rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH, ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; @@ -4461,11 +4701,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); + bool notified = false; ops = br_dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, + ¬ified, extack); } else { if (ops->ndo_fdb_del_bulk) err = ops->ndo_fdb_del_bulk(nlh, dev, extack); @@ -4479,10 +4721,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if (ndm->ndm_flags & NTF_SELF) { + bool notified = false; + ops = dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, + ¬ified, extack); else err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); } else { @@ -4493,7 +4738,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!err) { - if (!del_bulk) + if (!del_bulk && !notified) rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; @@ -6198,7 +6443,7 @@ static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx, s_idx; int err; - NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx); + NL_ASSERT_CTX_FITS(struct rtnl_mdb_dump_ctx); if (cb->strict_check) { err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); @@ -6765,6 +7010,41 @@ static struct pernet_operations rtnetlink_net_ops = { .exit = rtnetlink_net_exit, }; +static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.msgtype = RTM_DELLINK, .doit = rtnl_dellink, + .flags = RTNL_FLAG_DOIT_PERNET_WIP}, + {.msgtype = RTM_GETLINK, .doit = rtnl_getlink, + .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, + {.msgtype = RTM_SETLINK, .doit = rtnl_setlink, + .flags = RTNL_FLAG_DOIT_PERNET_WIP}, + {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETSTATS, .doit = rtnl_stats_get, + .dumpit = rtnl_stats_dump}, + {.msgtype = RTM_SETSTATS, .doit = rtnl_stats_set}, + {.msgtype = RTM_NEWLINKPROP, .doit = rtnl_newlinkprop}, + {.msgtype = RTM_DELLINKPROP, .doit = rtnl_dellinkprop}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETLINK, + .dumpit = rtnl_bridge_getlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELLINK, + .doit = rtnl_bridge_dellink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_SETLINK, + .doit = rtnl_bridge_setlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWNEIGH, .doit = rtnl_fdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELNEIGH, .doit = rtnl_fdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETNEIGH, .doit = rtnl_fdb_get, + .dumpit = rtnl_fdb_dump}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWMDB, .doit = rtnl_mdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELMDB, .doit = rtnl_mdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETMDB, .doit = rtnl_mdb_get, + .dumpit = rtnl_mdb_dump}, +}; + void __init rtnetlink_init(void) { if (register_pernet_subsys(&rtnetlink_net_ops)) @@ -6772,34 +7052,5 @@ void __init rtnetlink_init(void) register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, - rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0); - - rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0); - - rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, - RTNL_FLAG_BULK_DEL_SUPPORTED); - rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0); - - rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); - rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, - 0); - rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); - - rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); - rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, - RTNL_FLAG_BULK_DEL_SUPPORTED); + rtnl_register_many(rtnetlink_rtnl_msg_handlers); } diff --git a/net/core/rtnl_net_debug.c b/net/core/rtnl_net_debug.c new file mode 100644 index 000000000000..f406045cbd0e --- /dev/null +++ b/net/core/rtnl_net_debug.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/notifier.h> +#include <linux/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +static int rtnl_net_debug_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + enum netdev_cmd cmd = event; + + /* Keep enum and don't add default to trigger -Werror=switch */ + switch (cmd) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_REBOOT: + case NETDEV_CHANGE: + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + case NETDEV_CHANGEMTU: + case NETDEV_CHANGEADDR: + case NETDEV_PRE_CHANGEADDR: + case NETDEV_GOING_DOWN: + case NETDEV_CHANGENAME: + case NETDEV_FEAT_CHANGE: + case NETDEV_BONDING_FAILOVER: + case NETDEV_PRE_UP: + case NETDEV_PRE_TYPE_CHANGE: + case NETDEV_POST_TYPE_CHANGE: + case NETDEV_POST_INIT: + case NETDEV_PRE_UNINIT: + case NETDEV_RELEASE: + case NETDEV_NOTIFY_PEERS: + case NETDEV_JOIN: + case NETDEV_CHANGEUPPER: + case NETDEV_RESEND_IGMP: + case NETDEV_PRECHANGEMTU: + case NETDEV_CHANGEINFODATA: + case NETDEV_BONDING_INFO: + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGELOWERSTATE: + case NETDEV_UDP_TUNNEL_PUSH_INFO: + case NETDEV_UDP_TUNNEL_DROP_INFO: + case NETDEV_CHANGE_TX_QUEUE_LEN: + case NETDEV_CVLAN_FILTER_PUSH_INFO: + case NETDEV_CVLAN_FILTER_DROP_INFO: + case NETDEV_SVLAN_FILTER_PUSH_INFO: + case NETDEV_SVLAN_FILTER_DROP_INFO: + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + case NETDEV_XDP_FEAT_CHANGE: + ASSERT_RTNL(); + break; + + /* Once an event fully supports RTNL_NET, move it here + * and remove "if (0)" below. + * + * case NETDEV_XXX: + * ASSERT_RTNL_NET(net); + * break; + */ + } + + /* Just to avoid unused-variable error for dev and net. */ + if (0) + ASSERT_RTNL_NET(net); + + return NOTIFY_DONE; +} + +static int rtnl_net_debug_net_id; + +static int __net_init rtnl_net_debug_net_init(struct net *net) +{ + struct notifier_block *nb; + + nb = net_generic(net, rtnl_net_debug_net_id); + nb->notifier_call = rtnl_net_debug_event; + + return register_netdevice_notifier_net(net, nb); +} + +static void __net_exit rtnl_net_debug_net_exit(struct net *net) +{ + struct notifier_block *nb; + + nb = net_generic(net, rtnl_net_debug_net_id); + unregister_netdevice_notifier_net(net, nb); +} + +static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = { + .init = rtnl_net_debug_net_init, + .exit = rtnl_net_debug_net_exit, + .id = &rtnl_net_debug_net_id, + .size = sizeof(struct notifier_block), +}; + +static struct notifier_block rtnl_net_debug_block = { + .notifier_call = rtnl_net_debug_event, +}; + +static int __init rtnl_net_debug_init(void) +{ + int ret; + + ret = register_pernet_device(&rtnl_net_debug_net_ops); + if (ret) + return ret; + + ret = register_netdevice_notifier(&rtnl_net_debug_block); + if (ret) + unregister_pernet_subsys(&rtnl_net_debug_net_ops); + + return ret; +} + +subsys_initcall(rtnl_net_debug_init); diff --git a/net/core/skb_fault_injection.c b/net/core/skb_fault_injection.c new file mode 100644 index 000000000000..4235db6bdfad --- /dev/null +++ b/net/core/skb_fault_injection.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/debugfs.h> +#include <linux/fault-inject.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +static struct { + struct fault_attr attr; + char devname[IFNAMSIZ]; + bool filtered; +} skb_realloc = { + .attr = FAULT_ATTR_INITIALIZER, + .filtered = false, +}; + +static bool should_fail_net_realloc_skb(struct sk_buff *skb) +{ + struct net_device *net = skb->dev; + + if (skb_realloc.filtered && + strncmp(net->name, skb_realloc.devname, IFNAMSIZ)) + /* device name filter set, but names do not match */ + return false; + + if (!should_fail(&skb_realloc.attr, 1)) + return false; + + return true; +} +ALLOW_ERROR_INJECTION(should_fail_net_realloc_skb, TRUE); + +void skb_might_realloc(struct sk_buff *skb) +{ + if (!should_fail_net_realloc_skb(skb)) + return; + + pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} +EXPORT_SYMBOL(skb_might_realloc); + +static int __init fail_skb_realloc_setup(char *str) +{ + return setup_fault_attr(&skb_realloc.attr, str); +} +__setup("fail_skb_realloc=", fail_skb_realloc_setup); + +static void reset_settings(void) +{ + skb_realloc.filtered = false; + memset(&skb_realloc.devname, 0, IFNAMSIZ); +} + +static ssize_t devname_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + ssize_t ret; + + reset_settings(); + ret = simple_write_to_buffer(&skb_realloc.devname, IFNAMSIZ, + ppos, buffer, count); + if (ret < 0) + return ret; + + skb_realloc.devname[IFNAMSIZ - 1] = '\0'; + /* Remove a possible \n at the end of devname */ + strim(skb_realloc.devname); + + if (strnlen(skb_realloc.devname, IFNAMSIZ)) + skb_realloc.filtered = true; + + return count; +} + +static ssize_t devname_read(struct file *file, + char __user *buffer, + size_t size, loff_t *ppos) +{ + if (!skb_realloc.filtered) + return 0; + + return simple_read_from_buffer(buffer, size, ppos, &skb_realloc.devname, + strlen(skb_realloc.devname)); +} + +static const struct file_operations devname_ops = { + .write = devname_write, + .read = devname_read, +}; + +static int __init fail_skb_realloc_debugfs(void) +{ + umode_t mode = S_IFREG | 0600; + struct dentry *dir; + + dir = fault_create_debugfs_attr("fail_skb_realloc", NULL, + &skb_realloc.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + debugfs_create_file("devname", mode, dir, NULL, &devname_ops); + + return 0; +} + +late_initcall(fail_skb_realloc_debugfs); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 74149dc4ee31..6841e61a6bd0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -753,14 +753,14 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, len, gfp_mask); - pfmemalloc = nc->pfmemalloc; + pfmemalloc = page_frag_cache_is_pfmemalloc(nc); } else { local_bh_disable(); local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); - pfmemalloc = nc->pfmemalloc; + pfmemalloc = page_frag_cache_is_pfmemalloc(nc); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); local_bh_enable(); @@ -850,7 +850,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) len = SKB_HEAD_ALIGN(len); data = page_frag_alloc(&nc->page, len, gfp_mask); - pfmemalloc = nc->page.pfmemalloc; + pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); } local_unlock_nested_bh(&napi_alloc_cache.bh_lock); @@ -5506,7 +5506,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; - if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) + if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data))) return true; read_lock_bh(&sk->sk_callback_lock); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index b1dcbd3be89e..e90fbab703b2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1117,9 +1117,9 @@ static void sk_psock_strp_data_ready(struct sock *sk) if (tls_sw_has_ctx_rx(sk)) { psock->saved_data_ready(sk); } else { - write_lock_bh(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); strp_data_ready(&psock->strp); - write_unlock_bh(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); diff --git a/net/core/sock.c b/net/core/sock.c index da50df485090..74729d20cd00 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -286,8 +286,6 @@ EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; -int sysctl_tstamp_allow_data __read_mostly = 1; - DEFINE_STATIC_KEY_FALSE(memalloc_socks_key); EXPORT_SYMBOL_GPL(memalloc_socks_key); @@ -822,14 +820,11 @@ EXPORT_SYMBOL(sock_set_sndtimeo); static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) { + sock_valbool_flag(sk, SOCK_RCVTSTAMP, val); + sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, val && ns); if (val) { sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); - sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); - sock_set_flag(sk, SOCK_RCVTSTAMP); sock_enable_timestamp(sk, SOCK_TIMESTAMP); - } else { - sock_reset_flag(sk, SOCK_RCVTSTAMP); - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); } } @@ -2600,14 +2595,11 @@ void __sock_wfree(struct sk_buff *skb) void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { skb_orphan(skb); - skb->sk = sk; #ifdef CONFIG_INET - if (unlikely(!sk_fullsock(sk))) { - skb->destructor = sock_edemux; - sock_hold(sk); - return; - } + if (unlikely(!sk_fullsock(sk))) + return skb_set_owner_edemux(skb, sk); #endif + skb->sk = sk; skb->destructor = sock_wfree; skb_set_hash_from_sk(skb, sk); /* @@ -2905,6 +2897,8 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, { u32 tsflags; + BUILD_BUG_ON(SOF_TIMESTAMPING_LAST == (1 << 31)); + switch (cmsg->cmsg_type) { case SO_MARK: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && @@ -2933,6 +2927,17 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, return -EINVAL; sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); break; + case SCM_TS_OPT_ID: + if (sk_is_tcp(sk)) + return -EINVAL; + tsflags = READ_ONCE(sk->sk_tsflags); + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) + return -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); + sockc->tsflags |= SOCKCM_FLAG_TS_OPT_ID; + break; /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ case SCM_RIGHTS: case SCM_CREDENTIALS: @@ -3825,9 +3830,6 @@ void sk_common_release(struct sock *sk) sk->sk_prot->unhash(sk); - if (sk->sk_socket) - sk->sk_socket->sk = NULL; - /* * In this point socket cannot receive new packets, but it is possible * that some packets are in flight because some CPU runs receiver and diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 86a2476678c4..cb8d32e5c14e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -51,29 +51,45 @@ int sysctl_devconf_inherit_init_net __read_mostly; EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) -static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, - struct cpumask *mask) +static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, + struct cpumask *mask) { - char kbuf[128]; + char *kbuf; int len; if (*ppos || !*lenp) { *lenp = 0; - return; + return 0; + } + + /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 + * nibbles (except the last one which has a newline instead). + * Guesstimate the buffer size at the group granularity level. + */ + len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); + kbuf = kmalloc(len, GFP_KERNEL); + if (!kbuf) { + *lenp = 0; + return -ENOMEM; } - len = min(sizeof(kbuf) - 1, *lenp); len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); if (!len) { *lenp = 0; - return; + goto free_buf; } - if (len < *lenp) - kbuf[len++] = '\n'; + /* scnprintf writes a trailing null char not counted in the returned + * length, override it with a newline. + */ + kbuf[len++] = '\n'; memcpy(buffer, kbuf, len); *lenp = len; *ppos += len; + +free_buf: + kfree(kbuf); + return 0; } #endif @@ -117,8 +133,8 @@ static int rps_default_mask_sysctl(const struct ctl_table *table, int write, if (err) goto done; } else { - dump_cpumask(buffer, lenp, ppos, - net->core.rps_default_mask ? : cpu_none_mask); + err = dump_cpumask(buffer, lenp, ppos, + net->core.rps_default_mask ? : cpu_none_mask); } done: @@ -247,7 +263,7 @@ write_unlock: } rcu_read_unlock(); - dump_cpumask(buffer, lenp, ppos, mask); + ret = dump_cpumask(buffer, lenp, ppos, mask); } done: @@ -491,15 +507,6 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "tstamp_allow_data", - .data = &sysctl_tstamp_allow_data, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE - }, #ifdef CONFIG_RPS { .procname = "rps_sock_flow_entries", @@ -665,6 +672,15 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, + { + .procname = "tstamp_allow_data", + .data = &init_net.core.sysctl_tstamp_allow_data, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, /* sysctl_core_net_init() will set the values after this * to readonly in network namespaces */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 2e6b8c8fd2de..03eb1d941fca 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2408,6 +2408,11 @@ static struct notifier_block dcbnl_nb __read_mostly = { .notifier_call = dcbnl_netdevice_event, }; +static const struct rtnl_msg_handler dcbnl_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_GETDCB, .doit = dcb_doit}, + {.msgtype = RTM_SETDCB, .doit = dcb_doit}, +}; + static int __init dcbnl_init(void) { int err; @@ -2416,8 +2421,7 @@ static int __init dcbnl_init(void) if (err) return err; - rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); + rtnl_register_many(dcbnl_rtnl_msg_handlers); return 0; } diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 13c73f50da3d..d6e3db300acb 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -531,10 +531,8 @@ int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info) return err; } - if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION]) - action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]); - else - action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT; + action = nla_get_u8_default(info->attrs[DEVLINK_ATTR_RELOAD_ACTION], + DEVLINK_RELOAD_ACTION_DRIVER_REINIT); if (!devlink_reload_action_is_supported(devlink, action)) { NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver"); @@ -971,14 +969,14 @@ static int devlink_nl_flash_update_fill(struct sk_buff *msg, nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, params->component)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE, - params->done, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE, + params->done)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL, - params->total, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL, + params->total)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, - params->timeout, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, + params->timeout)) goto nla_put_failure; out: diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index c7a8e13f917c..14eaad9cfe35 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -166,7 +166,7 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, static inline struct devlink_nl_dump_state * devlink_dump_state(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state); + NL_ASSERT_CTX_FITS(struct devlink_nl_dump_state); return (struct devlink_nl_dump_state *)cb->ctx; } @@ -181,6 +181,11 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return 0; } +static inline int devlink_nl_put_u64(struct sk_buff *msg, int attrtype, u64 val) +{ + return nla_put_u64_64bit(msg, attrtype, val, DEVLINK_ATTR_PAD); +} + int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c index 55009b377447..e55701b007f0 100644 --- a/net/devlink/dpipe.c +++ b/net/devlink/dpipe.c @@ -165,18 +165,17 @@ static int devlink_dpipe_table_put(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) || - nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size, - DEVLINK_ATTR_PAD)) + devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size)) goto nla_put_failure; if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, table->counters_enabled)) goto nla_put_failure; if (table->resource_valid) { - if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, - table->resource_id, DEVLINK_ATTR_PAD) || - nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS, - table->resource_units, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, + table->resource_id) || + devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS, + table->resource_units)) goto nla_put_failure; } if (devlink_dpipe_matches_put(table, skb)) @@ -403,12 +402,11 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb, if (!entry_attr) return -EMSGSIZE; - if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index, - DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index)) goto nla_put_failure; if (entry->counter_valid) - if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, - entry->counter, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, + entry->counter)) goto nla_put_failure; matches_attr = nla_nest_start_noflag(skb, diff --git a/net/devlink/health.c b/net/devlink/health.c index acb8c0e174bb..b8d3084e6fe0 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -287,29 +287,27 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, reporter->health_state)) goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, - reporter->error_count, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, + reporter->error_count)) goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, - reporter->recovery_count, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, + reporter->recovery_count)) goto reporter_nest_cancel; if (reporter->ops->recover && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, - reporter->graceful_period, - DEVLINK_ATTR_PAD)) + devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period)) goto reporter_nest_cancel; if (reporter->ops->recover && nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, reporter->auto_recover)) goto reporter_nest_cancel; if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, - jiffies_to_msecs(reporter->dump_ts), - DEVLINK_ATTR_PAD)) + devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts))) goto reporter_nest_cancel; if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, - reporter->dump_real_ts, DEVLINK_ATTR_PAD)) + devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, + reporter->dump_real_ts)) goto reporter_nest_cancel; if (reporter->ops->dump && nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, @@ -963,8 +961,7 @@ devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb) case NLA_U32: return nla_put_u32(skb, attrtype, *(u32 *)msg->value); case NLA_U64: - return nla_put_u64_64bit(skb, attrtype, *(u64 *)msg->value, - DEVLINK_ATTR_PAD); + return devlink_nl_put_u64(skb, attrtype, *(u64 *)msg->value); case NLA_NUL_STRING: return nla_put_string(skb, attrtype, (char *)&msg->value); case NLA_BINARY: diff --git a/net/devlink/rate.c b/net/devlink/rate.c index 7139e67e93ae..8828ffaf6cbc 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -108,12 +108,12 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, goto nla_put_failure; } - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE, - devlink_rate->tx_share, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_SHARE, + devlink_rate->tx_share)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX, - devlink_rate->tx_max, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_MAX, + devlink_rate->tx_max)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY, diff --git a/net/devlink/region.c b/net/devlink/region.c index 7319127c5913..63fb297f6d67 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -77,7 +77,7 @@ static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT); if (!snap_attr) - return -EINVAL; + return -EMSGSIZE; err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id); if (err) @@ -102,7 +102,7 @@ static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg, snapshots_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOTS); if (!snapshots_attr) - return -EINVAL; + return -EMSGSIZE; list_for_each_entry(snapshot, ®ion->snapshot_list, list) { err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot); @@ -145,9 +145,7 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink, if (err) goto nla_put_failure; - err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE, - region->size, - DEVLINK_ATTR_PAD); + err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE, region->size); if (err) goto nla_put_failure; @@ -210,8 +208,8 @@ devlink_nl_region_notify_build(struct devlink_region *region, if (err) goto out_cancel_msg; } else { - err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE, - region->size, DEVLINK_ATTR_PAD); + err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE, + region->size); if (err) goto out_cancel_msg; } @@ -773,8 +771,7 @@ static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg, if (err) goto nla_put_failure; - err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr, - DEVLINK_ATTR_PAD); + err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr); if (err) goto nla_put_failure; diff --git a/net/devlink/resource.c b/net/devlink/resource.c index 594c8aeb3bfa..2d6324f3d91f 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -141,12 +141,12 @@ devlink_resource_size_params_put(struct devlink_resource *resource, struct devlink_resource_size_params *size_params; size_params = &resource->size_params; - if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, - size_params->size_granularity, DEVLINK_ATTR_PAD) || - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, - size_params->size_max, DEVLINK_ATTR_PAD) || - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, - size_params->size_min, DEVLINK_ATTR_PAD) || + if (devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, + size_params->size_granularity) || + devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, + size_params->size_max) || + devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, + size_params->size_min) || nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit)) return -EMSGSIZE; return 0; @@ -157,9 +157,8 @@ static int devlink_resource_occ_put(struct devlink_resource *resource, { if (!resource->occ_get) return 0; - return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC, - resource->occ_get(resource->occ_get_priv), - DEVLINK_ATTR_PAD); + return devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_OCC, + resource->occ_get(resource->occ_get_priv)); } static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, @@ -174,14 +173,12 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, return -EMSGSIZE; if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) || - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size, - DEVLINK_ATTR_PAD) || - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id, - DEVLINK_ATTR_PAD)) + devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size) || + devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id)) goto nla_put_failure; if (resource->size != resource->size_new && - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, - resource->size_new, DEVLINK_ATTR_PAD)) + devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, + resource->size_new)) goto nla_put_failure; if (devlink_resource_occ_put(resource, skb)) goto nla_put_failure; @@ -348,7 +345,7 @@ int devl_resource_register(struct devlink *devlink, resource = devlink_resource_find(devlink, NULL, resource_id); if (resource) - return -EINVAL; + return -EEXIST; resource = kzalloc(sizeof(*resource), GFP_KERNEL); if (!resource) @@ -384,39 +381,6 @@ int devl_resource_register(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devl_resource_register); -/** - * devlink_resource_register - devlink resource register - * - * @devlink: devlink - * @resource_name: resource's name - * @resource_size: resource's size - * @resource_id: resource's id - * @parent_resource_id: resource's parent id - * @size_params: size parameters - * - * Generic resources should reuse the same names across drivers. - * Please see the generic resources list at: - * Documentation/networking/devlink/devlink-resource.rst - * - * Context: Takes and release devlink->lock <mutex>. - */ -int devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) -{ - int err; - - devl_lock(devlink); - err = devl_resource_register(devlink, resource_name, resource_size, - resource_id, parent_resource_id, size_params); - devl_unlock(devlink); - return err; -} -EXPORT_SYMBOL_GPL(devlink_resource_register); - static void devlink_resource_unregister(struct devlink *devlink, struct devlink_resource *resource) { @@ -517,28 +481,6 @@ void devl_resource_occ_get_register(struct devlink *devlink, EXPORT_SYMBOL_GPL(devl_resource_occ_get_register); /** - * devlink_resource_occ_get_register - register occupancy getter - * - * @devlink: devlink - * @resource_id: resource id - * @occ_get: occupancy getter callback - * @occ_get_priv: occupancy getter callback priv - * - * Context: Takes and release devlink->lock <mutex>. - */ -void devlink_resource_occ_get_register(struct devlink *devlink, - u64 resource_id, - devlink_resource_occ_get_t *occ_get, - void *occ_get_priv) -{ - devl_lock(devlink); - devl_resource_occ_get_register(devlink, resource_id, - occ_get, occ_get_priv); - devl_unlock(devlink); -} -EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register); - -/** * devl_resource_occ_get_unregister - unregister occupancy getter * * @devlink: devlink @@ -560,20 +502,3 @@ void devl_resource_occ_get_unregister(struct devlink *devlink, resource->occ_get_priv = NULL; } EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister); - -/** - * devlink_resource_occ_get_unregister - unregister occupancy getter - * - * @devlink: devlink - * @resource_id: resource id - * - * Context: Takes and release devlink->lock <mutex>. - */ -void devlink_resource_occ_get_unregister(struct devlink *devlink, - u64 resource_id) -{ - devl_lock(devlink); - devl_resource_occ_get_unregister(devlink, resource_id); - devl_unlock(devlink); -} -EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister); diff --git a/net/devlink/trap.c b/net/devlink/trap.c index 5d18c7424df1..f36087f90db5 100644 --- a/net/devlink/trap.c +++ b/net/devlink/trap.c @@ -189,14 +189,12 @@ devlink_trap_group_stats_put(struct sk_buff *msg, if (!attr) return -EMSGSIZE; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - u64_stats_read(&stats.rx_packets), - DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_PACKETS, + u64_stats_read(&stats.rx_packets))) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - u64_stats_read(&stats.rx_bytes), - DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_BYTES, + u64_stats_read(&stats.rx_bytes))) goto nla_put_failure; nla_nest_end(msg, attr); @@ -231,18 +229,15 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink, return -EMSGSIZE; if (devlink->ops->trap_drop_counter_get && - nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops, - DEVLINK_ATTR_PAD)) + devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, - u64_stats_read(&stats.rx_packets), - DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_PACKETS, + u64_stats_read(&stats.rx_packets))) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, - u64_stats_read(&stats.rx_bytes), - DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_BYTES, + u64_stats_read(&stats.rx_bytes))) goto nla_put_failure; nla_nest_end(msg, attr); @@ -750,8 +745,7 @@ devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink, if (!attr) return -EMSGSIZE; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops, - DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops)) goto nla_put_failure; nla_nest_end(msg, attr); @@ -783,12 +777,12 @@ devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink, policer_item->policer->id)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE, - policer_item->rate, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_TRAP_POLICER_RATE, + policer_item->rate)) goto nla_put_failure; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST, - policer_item->burst, DEVLINK_ATTR_PAD)) + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_TRAP_POLICER_BURST, + policer_item->burst)) goto nla_put_failure; err = devlink_trap_policer_stats_put(msg, devlink, diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c index 0aac887d0098..f41f9fc2194e 100644 --- a/net/dsa/devlink.c +++ b/net/dsa/devlink.c @@ -229,10 +229,15 @@ int dsa_devlink_resource_register(struct dsa_switch *ds, u64 parent_resource_id, const struct devlink_resource_size_params *size_params) { - return devlink_resource_register(ds->devlink, resource_name, - resource_size, resource_id, - parent_resource_id, - size_params); + int ret; + + devl_lock(ds->devlink); + ret = devl_resource_register(ds->devlink, resource_name, resource_size, + resource_id, parent_resource_id, + size_params); + devl_unlock(ds->devlink); + + return ret; } EXPORT_SYMBOL_GPL(dsa_devlink_resource_register); @@ -247,15 +252,19 @@ void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds, devlink_resource_occ_get_t *occ_get, void *occ_get_priv) { - return devlink_resource_occ_get_register(ds->devlink, resource_id, - occ_get, occ_get_priv); + devl_lock(ds->devlink); + devl_resource_occ_get_register(ds->devlink, resource_id, occ_get, + occ_get_priv); + devl_unlock(ds->devlink); } EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register); void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, u64 resource_id) { - devlink_resource_occ_get_unregister(ds->devlink, resource_id); + devl_lock(ds->devlink); + devl_resource_occ_get_unregister(ds->devlink, resource_id); + devl_unlock(ds->devlink); } EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1664547deffd..5a7c0e565a89 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1505,14 +1505,6 @@ static int dsa_switch_probe(struct dsa_switch *ds) if (!ds->num_ports) return -EINVAL; - if (ds->phylink_mac_ops) { - if (ds->ops->phylink_mac_select_pcs || - ds->ops->phylink_mac_config || - ds->ops->phylink_mac_link_down || - ds->ops->phylink_mac_link_up) - return -EINVAL; - } - if (np) { err = dsa_switch_parse_of(ds, np); if (err) diff --git a/net/dsa/port.c b/net/dsa/port.c index 25258b33e59e..ee0aaec4c8e0 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1575,44 +1575,16 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, cpu_dp->tag_ops = tag_ops; } -static struct phylink_pcs * -dsa_port_phylink_mac_select_pcs(struct phylink_config *config, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP); - struct dsa_switch *ds = dp->ds; - - if (ds->ops->phylink_mac_select_pcs) - pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface); - - return pcs; -} - static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->phylink_mac_config) - return; - - ds->ops->phylink_mac_config(ds, dp->index, mode, state); } static void dsa_port_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->phylink_mac_link_down) - return; - - ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface); } static void dsa_port_phylink_mac_link_up(struct phylink_config *config, @@ -1622,18 +1594,9 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, int speed, int duplex, bool tx_pause, bool rx_pause) { - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->phylink_mac_link_up) - return; - - ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev, - speed, duplex, tx_pause, rx_pause); } static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { - .mac_select_pcs = dsa_port_phylink_mac_select_pcs, .mac_config = dsa_port_phylink_mac_config, .mac_link_down = dsa_port_phylink_mac_link_down, .mac_link_up = dsa_port_phylink_mac_link_up, @@ -1871,9 +1834,6 @@ static void dsa_shared_port_link_down(struct dsa_port *dp) if (ds->phylink_mac_ops && ds->phylink_mac_ops->mac_link_down) ds->phylink_mac_ops->mac_link_down(&dp->pl_config, MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); - else if (ds->ops->phylink_mac_link_down) - ds->ops->phylink_mac_link_down(ds, dp->index, MLO_AN_FIXED, - PHY_INTERFACE_MODE_NA); } int dsa_shared_port_link_register_of(struct dsa_port *dp) diff --git a/net/dsa/user.c b/net/dsa/user.c index 64f660d2334b..06c30a9e29ff 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1042,15 +1042,12 @@ static void dsa_user_get_strings(struct net_device *dev, struct dsa_switch *ds = dp->ds; if (stringset == ETH_SS_STATS) { - int len = ETH_GSTRING_LEN; - - strscpy_pad(data, "tx_packets", len); - strscpy_pad(data + len, "tx_bytes", len); - strscpy_pad(data + 2 * len, "rx_packets", len); - strscpy_pad(data + 3 * len, "rx_bytes", len); + ethtool_puts(&data, "tx_packets"); + ethtool_puts(&data, "tx_bytes"); + ethtool_puts(&data, "rx_packets"); + ethtool_puts(&data, "rx_bytes"); if (ds->ops->get_strings) - ds->ops->get_strings(ds, dp->index, stringset, - data + 4 * len); + ds->ops->get_strings(ds, dp->index, stringset, data); } else if (stringset == ETH_SS_TEST) { net_selftest_get_strings(data); } @@ -1308,8 +1305,7 @@ static int dsa_user_set_pauseparam(struct net_device *dev, } #ifdef CONFIG_NET_POLL_CONTROLLER -static int dsa_user_netpoll_setup(struct net_device *dev, - struct netpoll_info *ni) +static int dsa_user_netpoll_setup(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_user_priv *p = netdev_priv(dev); @@ -1365,7 +1361,7 @@ dsa_user_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) static int dsa_user_add_cls_matchall_mirred(struct net_device *dev, struct tc_cls_matchall_offload *cls, - bool ingress) + bool ingress, bool ingress_target) { struct netlink_ext_ack *extack = cls->common.extack; struct dsa_port *dp = dsa_user_to_port(dev); @@ -1377,11 +1373,19 @@ dsa_user_add_cls_matchall_mirred(struct net_device *dev, struct dsa_port *to_dp; int err; - if (!ds->ops->port_mirror_add) + if (cls->common.protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG_MOD(extack, + "Can only offload \"protocol all\" matchall filter"); + return -EOPNOTSUPP; + } + + if (!ds->ops->port_mirror_add) { + NL_SET_ERR_MSG_MOD(extack, + "Switch does not support mirroring operation"); return -EOPNOTSUPP; + } - if (!flow_action_basic_hw_stats_check(&cls->rule->action, - cls->common.extack)) + if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack)) return -EOPNOTSUPP; act = &cls->rule->action.entries[0]; @@ -1389,10 +1393,30 @@ dsa_user_add_cls_matchall_mirred(struct net_device *dev, if (!act->dev) return -EINVAL; - if (!dsa_user_dev_check(act->dev)) - return -EOPNOTSUPP; - - to_dp = dsa_user_to_port(act->dev); + if (dsa_user_dev_check(act->dev)) { + if (ingress_target) { + /* We can only fulfill this using software assist */ + if (cls->common.skip_sw) { + NL_SET_ERR_MSG_MOD(extack, + "Can only mirred to ingress of DSA user port if filter also runs in software"); + return -EOPNOTSUPP; + } + to_dp = dp->cpu_dp; + } else { + to_dp = dsa_user_to_port(act->dev); + } + } else { + /* Handle mirroring to foreign target ports as a mirror towards + * the CPU. The software tc rule will take the packets from + * there. + */ + if (cls->common.skip_sw) { + NL_SET_ERR_MSG_MOD(extack, + "Can only mirred to CPU if filter also runs in software"); + return -EOPNOTSUPP; + } + to_dp = dp->cpu_dp; + } if (dp->ds != to_dp->ds) { NL_SET_ERR_MSG_MOD(extack, @@ -1447,8 +1471,7 @@ dsa_user_add_cls_matchall_police(struct net_device *dev, return -EOPNOTSUPP; } - if (!flow_action_basic_hw_stats_check(&cls->rule->action, - cls->common.extack)) + if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack)) return -EOPNOTSUPP; list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) { @@ -1486,17 +1509,30 @@ static int dsa_user_add_cls_matchall(struct net_device *dev, struct tc_cls_matchall_offload *cls, bool ingress) { - int err = -EOPNOTSUPP; + const struct flow_action *action = &cls->rule->action; + struct netlink_ext_ack *extack = cls->common.extack; - if (cls->common.protocol == htons(ETH_P_ALL) && - flow_offload_has_one_action(&cls->rule->action) && - cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED) - err = dsa_user_add_cls_matchall_mirred(dev, cls, ingress); - else if (flow_offload_has_one_action(&cls->rule->action) && - cls->rule->action.entries[0].id == FLOW_ACTION_POLICE) - err = dsa_user_add_cls_matchall_police(dev, cls, ingress); + if (!flow_offload_has_one_action(action)) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload matchall filter with more than one action"); + return -EOPNOTSUPP; + } - return err; + switch (action->entries[0].id) { + case FLOW_ACTION_MIRRED: + return dsa_user_add_cls_matchall_mirred(dev, cls, ingress, + false); + case FLOW_ACTION_MIRRED_INGRESS: + return dsa_user_add_cls_matchall_mirred(dev, cls, ingress, + true); + case FLOW_ACTION_POLICE: + return dsa_user_add_cls_matchall_police(dev, cls, ingress); + default: + NL_SET_ERR_MSG_MOD(extack, "Unknown action"); + break; + } + + return -EOPNOTSUPP; } static void dsa_user_del_cls_matchall(struct net_device *dev, diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h index 3e7c293af78c..1e790413db0e 100644 --- a/net/ethtool/cmis.h +++ b/net/ethtool/cmis.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #define ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH 120 +#define ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH 2048 #define ETHTOOL_CMIS_CDB_CMD_PAGE 0x9F #define ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR 0x50 @@ -23,6 +24,7 @@ enum ethtool_cmis_cdb_cmd_id { ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES = 0x0041, ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD = 0x0101, ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL = 0x0103, + ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_EPL = 0x0104, ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD = 0x0107, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE = 0x0109, ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE = 0x010A, @@ -38,6 +40,7 @@ enum ethtool_cmis_cdb_cmd_id { * @resv1: Added to match the CMIS standard request continuity. * @resv2: Added to match the CMIS standard request continuity. * @payload: Payload for the CDB commands. + * @epl: Extended payload for the CDB commands. */ struct ethtool_cmis_cdb_request { __be16 id; @@ -49,6 +52,7 @@ struct ethtool_cmis_cdb_request { u8 resv2; u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH]; ); + u8 *epl; /* Everything above this field checksummed. */ }; #define CDB_F_COMPLETION_VALID BIT(0) @@ -96,13 +100,15 @@ struct ethtool_cmis_cdb_rpl { u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH]; }; -u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs); +u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs); +u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs); void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, - enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl, - u8 lpl_len, u16 max_duration, - u8 read_write_len_ext, u16 msleep_pre_rpl, - u8 rpl_exp_len, u8 flags); + enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl, + u8 lpl_len, u8 *epl, u16 epl_len, + u16 max_duration, u8 read_write_len_ext, + u16 msleep_pre_rpl, u8 rpl_exp_len, + u8 flags); void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags); diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index 4d5581147952..d159dc121bde 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -11,25 +11,41 @@ * min(i, 15) byte octets where i specifies the allowable additional number of * byte octets in a READ or a WRITE. */ -u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs) +u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs) { return 8 * (1 + min_t(u8, num_of_byte_octs, 15)); } +/* For accessing the EPL field on page 9Fh, the allowable length extension is + * min(i, 255) byte octets where i specifies the allowable additional number of + * byte octets in a READ or a WRITE. + */ +u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs) +{ + return 8 * (1 + min_t(u8, num_of_byte_octs, 255)); +} + void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, - enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl, - u8 lpl_len, u16 max_duration, - u8 read_write_len_ext, u16 msleep_pre_rpl, - u8 rpl_exp_len, u8 flags) + enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl, + u8 lpl_len, u8 *epl, u16 epl_len, + u16 max_duration, u8 read_write_len_ext, + u16 msleep_pre_rpl, u8 rpl_exp_len, u8 flags) { args->req.id = cpu_to_be16(cmd); args->req.lpl_len = lpl_len; - if (pl) - memcpy(args->req.payload, pl, args->req.lpl_len); + if (lpl) { + memcpy(args->req.payload, lpl, args->req.lpl_len); + args->read_write_len_ext = + ethtool_cmis_get_max_lpl_size(read_write_len_ext); + } + if (epl) { + args->req.epl_len = cpu_to_be16(epl_len); + args->req.epl = epl; + args->read_write_len_ext = + ethtool_cmis_get_max_epl_size(read_write_len_ext); + } args->max_duration = max_duration; - args->read_write_len_ext = - ethtool_cmis_get_max_payload_size(read_write_len_ext); args->msleep_pre_rpl = msleep_pre_rpl; args->rpl_exp_len = rpl_exp_len; args->flags = flags; @@ -183,7 +199,7 @@ cmis_cdb_validate_password(struct ethtool_cmis_cdb *cdb, } ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS, - (u8 *)&qs_pl, sizeof(qs_pl), 0, + (u8 *)&qs_pl, sizeof(qs_pl), NULL, 0, 0, cdb->read_write_len_ext, 1000, sizeof(*rpl), CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); @@ -245,8 +261,9 @@ static int cmis_cdb_module_features_get(struct ethtool_cmis_cdb *cdb, ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags); ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES, - NULL, 0, 0, cdb->read_write_len_ext, - 1000, sizeof(*rpl), flags); + NULL, 0, NULL, 0, 0, + cdb->read_write_len_ext, 1000, + sizeof(*rpl), flags); err = ethtool_cmis_cdb_execute_cmd(dev, &args); if (err < 0) { @@ -546,6 +563,49 @@ __ethtool_cmis_cdb_execute_cmd(struct net_device *dev, return err; } +#define CMIS_CDB_EPL_PAGE_START 0xA0 +#define CMIS_CDB_EPL_PAGE_END 0xAF +#define CMIS_CDB_EPL_FW_BLOCK_OFFSET_START 128 +#define CMIS_CDB_EPL_FW_BLOCK_OFFSET_END 255 + +static int +ethtool_cmis_cdb_execute_epl_cmd(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args, + struct ethtool_module_eeprom *page_data) +{ + u16 epl_len = be16_to_cpu(args->req.epl_len); + u32 bytes_written = 0; + u8 page; + int err; + + for (page = CMIS_CDB_EPL_PAGE_START; + page <= CMIS_CDB_EPL_PAGE_END && bytes_written < epl_len; page++) { + u16 offset = CMIS_CDB_EPL_FW_BLOCK_OFFSET_START; + + while (offset <= CMIS_CDB_EPL_FW_BLOCK_OFFSET_END && + bytes_written < epl_len) { + u32 bytes_left = epl_len - bytes_written; + u16 space_left, bytes_to_write; + + space_left = CMIS_CDB_EPL_FW_BLOCK_OFFSET_END - offset + 1; + bytes_to_write = min_t(u16, bytes_left, + min_t(u16, space_left, + args->read_write_len_ext)); + + err = __ethtool_cmis_cdb_execute_cmd(dev, page_data, + page, offset, + bytes_to_write, + args->req.epl + bytes_written); + if (err < 0) + return err; + + offset += bytes_to_write; + bytes_written += bytes_to_write; + } + } + return 0; +} + static u8 cmis_cdb_calc_checksum(const void *data, size_t size) { const u8 *bytes = (const u8 *)data; @@ -567,7 +627,9 @@ int ethtool_cmis_cdb_execute_cmd(struct net_device *dev, int err; args->req.chk_code = - cmis_cdb_calc_checksum(&args->req, sizeof(args->req)); + cmis_cdb_calc_checksum(&args->req, + offsetof(struct ethtool_cmis_cdb_request, + epl)); if (args->req.lpl_len > args->read_write_len_ext) { args->err_msg = "LPL length is longer than CDB read write length extension allows"; @@ -589,6 +651,12 @@ int ethtool_cmis_cdb_execute_cmd(struct net_device *dev, if (err < 0) return err; + if (args->req.epl_len) { + err = ethtool_cmis_cdb_execute_epl_cmd(dev, args, &page_data); + if (err < 0) + return err; + } + offset = CMIS_CDB_CMD_ID_OFFSET + offsetof(struct ethtool_cmis_cdb_request, id); err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data, diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index 655ff5224ffa..48aef6220f00 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -9,6 +9,7 @@ struct cmis_fw_update_fw_mng_features { u8 start_cmd_payload_size; + u8 write_mechanism; u16 max_duration_start; u16 max_duration_write; u16 max_duration_complete; @@ -36,7 +37,9 @@ struct cmis_cdb_fw_mng_features_rpl { }; enum cmis_cdb_fw_write_mechanism { + CMIS_CDB_FW_WRITE_MECHANISM_NONE = 0x00, CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01, + CMIS_CDB_FW_WRITE_MECHANISM_EPL = 0x10, CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, }; @@ -54,7 +57,8 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags); ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES, - NULL, 0, cdb->max_completion_time, + NULL, 0, NULL, 0, + cdb->max_completion_time, cdb->read_write_len_ext, 1000, sizeof(*rpl), flags); @@ -67,10 +71,9 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, } rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload; - if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL || - rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) { + if (rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_NONE) { ethnl_module_fw_flash_ntf_err(dev, ntf_params, - "Write LPL is not supported", + "CDB write mechanism is not supported", NULL); return -EOPNOTSUPP; } @@ -82,6 +85,10 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, */ cdb->read_write_len_ext = rpl->read_write_len_ext; fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size; + fw_mng->write_mechanism = + rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ? + CMIS_CDB_FW_WRITE_MECHANISM_LPL : + CMIS_CDB_FW_WRITE_MECHANISM_EPL; fw_mng->max_duration_start = be16_to_cpu(rpl->max_duration_start); fw_mng->max_duration_write = be16_to_cpu(rpl->max_duration_write); fw_mng->max_duration_complete = be16_to_cpu(rpl->max_duration_complete); @@ -122,7 +129,7 @@ cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD, - (u8 *)&pl, lpl_len, + (u8 *)&pl, lpl_len, NULL, 0, fw_mng->max_duration_start, cdb->read_write_len_ext, 1000, 0, CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); @@ -148,9 +155,9 @@ struct cmis_cdb_write_fw_block_lpl_pl { }; static int -cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb, - struct ethtool_cmis_fw_update_params *fw_update, - struct cmis_fw_update_fw_mng_features *fw_mng) +cmis_fw_update_write_image_lpl(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) { u8 start = fw_mng->start_cmd_payload_size; u32 offset, max_block_size, max_lpl_len; @@ -158,7 +165,7 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb, int err; max_lpl_len = min_t(u32, - ethtool_cmis_get_max_payload_size(cdb->read_write_len_ext), + ethtool_cmis_get_max_lpl_size(cdb->read_write_len_ext), ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH); max_block_size = max_lpl_len - sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl, @@ -183,7 +190,7 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb, ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL, - (u8 *)&pl, lpl_len, + (u8 *)&pl, lpl_len, NULL, 0, fw_mng->max_duration_write, cdb->read_write_len_ext, 1, 0, CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); @@ -201,6 +208,67 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb, return 0; } +struct cmis_cdb_write_fw_block_epl_pl { + u8 fw_block[ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH]; +}; + +static int +cmis_fw_update_write_image_epl(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) +{ + u8 start = fw_mng->start_cmd_payload_size; + u32 image_size = fw_update->fw->size; + u32 offset, lpl_len; + int err; + + lpl_len = sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl, + block_address); + + for (offset = start; offset < image_size; + offset += ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH) { + struct cmis_cdb_write_fw_block_lpl_pl lpl = { + .block_address = cpu_to_be32(offset - start), + }; + struct cmis_cdb_write_fw_block_epl_pl *epl; + struct ethtool_cmis_cdb_cmd_args args = {}; + u32 epl_len; + + ethnl_module_fw_flash_ntf_in_progress(fw_update->dev, + &fw_update->ntf_params, + offset - start, + image_size); + + epl_len = min_t(u32, ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH, + image_size - offset); + epl = kmalloc_array(epl_len, sizeof(u8), GFP_KERNEL); + if (!epl) + return -ENOMEM; + + memcpy(epl->fw_block, &fw_update->fw->data[offset], epl_len); + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_EPL, + (u8 *)&lpl, lpl_len, (u8 *)epl, + epl_len, + fw_mng->max_duration_write, + cdb->read_write_len_ext, 1, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args); + kfree(epl); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Write FW block EPL command failed", + args.err_msg); + return err; + } + } + + return 0; +} + static int cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb, struct net_device *dev, @@ -212,7 +280,8 @@ cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb, ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD, - NULL, 0, fw_mng->max_duration_complete, + NULL, 0, NULL, 0, + fw_mng->max_duration_complete, cdb->read_write_len_ext, 1000, 0, CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); @@ -236,9 +305,15 @@ cmis_fw_update_download_image(struct ethtool_cmis_cdb *cdb, if (err < 0) return err; - err = cmis_fw_update_write_image(cdb, fw_update, fw_mng); - if (err < 0) - return err; + if (fw_mng->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL) { + err = cmis_fw_update_write_image_lpl(cdb, fw_update, fw_mng); + if (err < 0) + return err; + } else { + err = cmis_fw_update_write_image_epl(cdb, fw_update, fw_mng); + if (err < 0) + return err; + } err = cmis_fw_update_complete_download(cdb, fw_update->dev, fw_mng, &fw_update->ntf_params); @@ -294,7 +369,7 @@ cmis_fw_update_run_image(struct ethtool_cmis_cdb *cdb, struct net_device *dev, int err; ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE, - (u8 *)&pl, sizeof(pl), + (u8 *)&pl, sizeof(pl), NULL, 0, cdb->max_completion_time, cdb->read_write_len_ext, 1000, 0, CDB_F_MODULE_STATE_VALID); @@ -326,7 +401,8 @@ cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb, ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE, - NULL, 0, cdb->max_completion_time, + NULL, 0, NULL, 0, + cdb->max_completion_time, cdb->read_write_len_ext, 1000, 0, CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); diff --git a/net/ethtool/common.c b/net/ethtool/common.c index dd345efa114b..05ce4f8080b3 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -538,6 +538,20 @@ static int ethtool_get_rxnfc_rule_count(struct net_device *dev) return info.rule_cnt; } +/* Max offset for one RSS context */ +static u32 ethtool_get_rss_ctx_max_channel(struct ethtool_rxfh_context *ctx) +{ + u32 max_ring = 0; + u32 i, *tbl; + + if (WARN_ON_ONCE(!ctx)) + return 0; + tbl = ethtool_rxfh_context_indir(ctx); + for (i = 0; i < ctx->indir_size; i++) + max_ring = max(max_ring, tbl[i]); + return max_ring; +} + static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -574,10 +588,18 @@ static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max) if (rule_info.fs.ring_cookie != RX_CLS_FLOW_DISC && rule_info.fs.ring_cookie != RX_CLS_FLOW_WAKE && - !(rule_info.flow_type & FLOW_RSS) && - !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie)) - max_ring = - max_t(u64, max_ring, rule_info.fs.ring_cookie); + !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie)) { + u64 ring = rule_info.fs.ring_cookie; + + if (rule_info.flow_type & FLOW_RSS) { + struct ethtool_rxfh_context *ctx; + + ctx = xa_load(&dev->ethtool->rss_ctx, + rule_info.rss_context); + ring += ethtool_get_rss_ctx_max_channel(ctx); + } + max_ring = max_t(u64, max_ring, ring); + } } kvfree(info); @@ -589,6 +611,7 @@ err_free_info: return err; } +/* Max offset across all of a device's RSS contexts */ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev) { struct ethtool_rxfh_context *ctx; @@ -596,13 +619,8 @@ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev) u32 max_ring = 0; mutex_lock(&dev->ethtool->rss_lock); - xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { - u32 i, *tbl; - - tbl = ethtool_rxfh_context_indir(ctx); - for (i = 0; i < ctx->indir_size; i++) - max_ring = max(max_ring, tbl[i]); - } + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) + max_ring = max(max_ring, ethtool_get_rss_ctx_max_channel(ctx)); mutex_unlock(&dev->ethtool->rss_lock); return max_ring; @@ -611,7 +629,7 @@ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev) static u32 ethtool_get_max_rxfh_channel(struct net_device *dev) { struct ethtool_rxfh_param rxfh = {}; - u32 dev_size, current_max; + u32 dev_size, current_max = 0; int ret; /* While we do track whether RSS context has an indirection @@ -684,6 +702,54 @@ int ethtool_check_max_channel(struct net_device *dev, return 0; } +int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxnfc *info; + int rc, i, rule_cnt; + + if (!ops->get_rxnfc) + return 0; + + rule_cnt = ethtool_get_rxnfc_rule_count(dev); + if (!rule_cnt) + return 0; + + if (rule_cnt < 0) + return -EINVAL; + + info = kvzalloc(struct_size(info, rule_locs, rule_cnt), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->cmd = ETHTOOL_GRXCLSRLALL; + info->rule_cnt = rule_cnt; + rc = ops->get_rxnfc(dev, info, info->rule_locs); + if (rc) + goto out_free; + + for (i = 0; i < rule_cnt; i++) { + struct ethtool_rxnfc rule_info = { + .cmd = ETHTOOL_GRXCLSRULE, + .fs.location = info->rule_locs[i], + }; + + rc = ops->get_rxnfc(dev, &rule_info, NULL); + if (rc) + goto out_free; + + if (rule_info.fs.flow_type & FLOW_RSS && + rule_info.rss_context == rss_context) { + rc = -EBUSY; + goto out_free; + } + } + +out_free: + kvfree(info); + return rc; +} + int ethtool_check_ops(const struct ethtool_ops *ops) { if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params)) diff --git a/net/ethtool/common.h b/net/ethtool/common.h index d55d5201b085..4a2de3ce7354 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -47,6 +47,7 @@ bool convert_legacy_settings_to_link_ksettings( int ethtool_check_max_channel(struct net_device *dev, struct ethtool_channels channels, struct genl_info *info); +int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context); int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); extern const struct ethtool_phy_ops *ethtool_phy_ops; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 65cfe76dafbe..61df8ce44379 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -992,6 +992,11 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, if (rc) return rc; + /* Nonzero ring with RSS only makes sense if NIC adds them together */ + if (info.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && + ethtool_get_flow_spec_ring(info.fs.ring_cookie)) + return -EINVAL; + if (ops->get_rxfh) { struct ethtool_rxfh_param rxfh = {}; @@ -1462,6 +1467,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, mutex_lock(&dev->ethtool->rss_lock); locked = true; } + + if (rxfh.rss_context && rxfh_dev.rss_delete) { + ret = ethtool_check_rss_ctx_busy(dev, rxfh.rss_context); + if (ret) + goto out; + } + if (create) { if (rxfh_dev.rss_delete) { ret = -EINVAL; @@ -1505,6 +1517,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, extack); /* Make sure driver populates defaults */ WARN_ON_ONCE(!ret && !rxfh_dev.key && + ops->rxfh_per_ctx_key && !memchr_inv(ethtool_rxfh_context_key(ctx), 0, ctx->key_size)); } else if (rxfh_dev.rss_delete) { diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index e07386275e14..7cb106b590ab 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -224,7 +224,7 @@ struct rss_nl_dump_ctx { static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb) { - NL_ASSERT_DUMP_CTX_FITS(struct rss_nl_dump_ctx); + NL_ASSERT_CTX_FITS(struct rss_nl_dump_ctx); return (struct rss_nl_dump_ctx *)cb->ctx; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index ebdfd5b64e17..9e64496a5c1c 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -515,6 +515,77 @@ static void hsr_change_rx_flags(struct net_device *dev, int change) } } +static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) +{ + bool is_slave_a_added = false; + bool is_slave_b_added = false; + struct hsr_port *port; + struct hsr_priv *hsr; + int ret = 0; + + hsr = netdev_priv(dev); + + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER || + port->type == HSR_PT_INTERLINK) + continue; + + ret = vlan_vid_add(port->dev, proto, vid); + switch (port->type) { + case HSR_PT_SLAVE_A: + if (ret) { + /* clean up Slave-B */ + netdev_err(dev, "add vid failed for Slave-A\n"); + if (is_slave_b_added) + vlan_vid_del(port->dev, proto, vid); + return ret; + } + + is_slave_a_added = true; + break; + + case HSR_PT_SLAVE_B: + if (ret) { + /* clean up Slave-A */ + netdev_err(dev, "add vid failed for Slave-B\n"); + if (is_slave_a_added) + vlan_vid_del(port->dev, proto, vid); + return ret; + } + + is_slave_b_added = true; + break; + default: + break; + } + } + + return 0; +} + +static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) +{ + struct hsr_port *port; + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + + hsr_for_each_port(hsr, port) { + switch (port->type) { + case HSR_PT_SLAVE_A: + case HSR_PT_SLAVE_B: + vlan_vid_del(port->dev, proto, vid); + break; + default: + break; + } + } + + return 0; +} + static const struct net_device_ops hsr_device_ops = { .ndo_change_mtu = hsr_dev_change_mtu, .ndo_open = hsr_dev_open, @@ -523,6 +594,8 @@ static const struct net_device_ops hsr_device_ops = { .ndo_change_rx_flags = hsr_change_rx_flags, .ndo_fix_features = hsr_fix_features, .ndo_set_rx_mode = hsr_set_rx_mode, + .ndo_vlan_rx_add_vid = hsr_ndo_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = hsr_ndo_vlan_rx_kill_vid, }; static const struct device_type hsr_type = { @@ -569,14 +642,10 @@ void hsr_dev_setup(struct net_device *dev) dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_FILTER; dev->features = dev->hw_features; - - /* VLAN on top of HSR needs testing and probably some work on - * hsr_header_create() etc. - */ - dev->features |= NETIF_F_VLAN_CHALLENGED; } /* Return true if dev is a HSR master; return false otherwise. @@ -652,6 +721,10 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], (slave[1]->features & NETIF_F_HW_HSR_FWD)) hsr->fwd_offloaded = true; + if ((slave[0]->features & NETIF_F_HW_VLAN_CTAG_FILTER) && + (slave[1]->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + hsr_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + res = register_netdevice(hsr_dev); if (res) goto err_unregister; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index b38060246e62..aa6acebc7c1e 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -280,6 +280,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, struct hsr_port *port, u8 proto_version) { struct hsr_ethhdr *hsr_ethhdr; + unsigned char *pc; int lsdu_size; /* pad to minimum packet size which is 60 + 6 (HSR tag) */ @@ -290,7 +291,18 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, if (frame->is_vlan) lsdu_size -= 4; - hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb); + pc = skb_mac_header(skb); + if (frame->is_vlan) + /* This 4-byte shift (size of a vlan tag) does not + * mean that the ethhdr starts there. But rather it + * provides the proper environment for accessing + * the fields, such as hsr_tag etc., just like + * when the vlan tag is not there. This is because + * the hsr tag is after the vlan tag. + */ + hsr_ethhdr = (struct hsr_ethhdr *)(pc + VLAN_HLEN); + else + hsr_ethhdr = (struct hsr_ethhdr *)pc; hsr_set_path_id(hsr_ethhdr, port); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); @@ -368,7 +380,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame, return skb_clone(frame->skb_std, GFP_ATOMIC); } - skb = skb_copy_expand(frame->skb_std, 0, + skb = skb_copy_expand(frame->skb_std, skb_headroom(frame->skb_std), skb_tailroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC); return prp_fill_rct(skb, frame, port); @@ -690,9 +702,6 @@ static int fill_frame_info(struct hsr_frame_info *frame, if (frame->is_vlan) { vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr; proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; - /* FIXME: */ - netdev_warn_once(skb->dev, "VLAN not yet supported"); - return -EINVAL; } frame->is_from_san = false; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index f6ff0b61e08a..b68f2f71d0e1 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -82,10 +82,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; } - if (!data[IFLA_HSR_MULTICAST_SPEC]) - multicast_spec = 0; - else - multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); + multicast_spec = nla_get_u8_default(data[IFLA_HSR_MULTICAST_SPEC], 0); if (data[IFLA_HSR_PROTOCOL]) proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]); @@ -128,9 +125,9 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) { struct hsr_priv *hsr = netdev_priv(dev); - del_timer_sync(&hsr->prune_timer); - del_timer_sync(&hsr->prune_proxy_timer); - del_timer_sync(&hsr->announce_timer); + timer_delete_sync(&hsr->prune_timer); + timer_delete_sync(&hsr->prune_proxy_timer); + timer_delete_sync(&hsr->announce_timer); timer_delete_sync(&hsr->announce_proxy_timer); hsr_debugfs_term(hsr); diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 29bf97640166..74ef0a310afb 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -202,10 +202,7 @@ int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info) addr.pan_id = nla_get_shortaddr( info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); - if (info->attrs[IEEE802154_ATTR_PAGE]) - page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); - else - page = 0; + page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0); ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]), @@ -338,10 +335,7 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]); coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]); - if (info->attrs[IEEE802154_ATTR_PAGE]) - page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); - else - page = 0; + page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0); if (addr.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS); @@ -388,10 +382,7 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]); - if (info->attrs[IEEE802154_ATTR_PAGE]) - page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); - else - page = 0; + page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0); ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, duration); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 7eb37de3add2..5a024ca60d35 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1438,22 +1438,18 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) } /* Use current page by default */ - if (info->attrs[NL802154_ATTR_PAGE]) - request->page = nla_get_u8(info->attrs[NL802154_ATTR_PAGE]); - else - request->page = wpan_phy->current_page; + request->page = nla_get_u8_default(info->attrs[NL802154_ATTR_PAGE], + wpan_phy->current_page); /* Scan all supported channels by default */ - if (info->attrs[NL802154_ATTR_SCAN_CHANNELS]) - request->channels = nla_get_u32(info->attrs[NL802154_ATTR_SCAN_CHANNELS]); - else - request->channels = wpan_phy->supported.channels[request->page]; + request->channels = + nla_get_u32_default(info->attrs[NL802154_ATTR_SCAN_CHANNELS], + wpan_phy->supported.channels[request->page]); /* Use maximum duration order by default */ - if (info->attrs[NL802154_ATTR_SCAN_DURATION]) - request->duration = nla_get_u8(info->attrs[NL802154_ATTR_SCAN_DURATION]); - else - request->duration = IEEE802154_MAX_SCAN_DURATION; + request->duration = + nla_get_u8_default(info->attrs[NL802154_ATTR_SCAN_DURATION], + IEEE802154_MAX_SCAN_DURATION); err = rdev_trigger_scan(rdev, request); if (err) { @@ -1598,10 +1594,8 @@ nl802154_send_beacons(struct sk_buff *skb, struct genl_info *info) request->wpan_phy = wpan_phy; /* Use maximum duration order by default */ - if (info->attrs[NL802154_ATTR_BEACON_INTERVAL]) - request->interval = nla_get_u8(info->attrs[NL802154_ATTR_BEACON_INTERVAL]); - else - request->interval = IEEE802154_MAX_SCAN_DURATION; + request->interval = nla_get_u8_default(info->attrs[NL802154_ATTR_BEACON_INTERVAL], + IEEE802154_MAX_SCAN_DURATION); err = rdev_send_beacons(rdev, request); if (err) { diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 990a83455dcf..18d267921bb5 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1043,19 +1043,21 @@ static int ieee802154_create(struct net *net, struct socket *sock, if (sk->sk_prot->hash) { rc = sk->sk_prot->hash(sk); - if (rc) { - sk_common_release(sk); - goto out; - } + if (rc) + goto out_sk_release; } if (sk->sk_prot->init) { rc = sk->sk_prot->init(sk); if (rc) - sk_common_release(sk); + goto out_sk_release; } out: return rc; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static const struct net_proto_family ieee802154_family_ops = { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b24d74616637..8095e82de808 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -376,32 +376,30 @@ lookup_protocol: inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 11c1519b3699..cb9a7ed8abd3 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1215,7 +1215,7 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) NEIGH_UPDATE_F_ADMIN, 0); write_lock_bh(&tbl->lock); neigh_release(neigh); - neigh_remove_one(neigh, tbl); + neigh_remove_one(neigh); write_unlock_bh(&tbl->lock); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7cf5f7d0d0de..c8b3cf5fba4c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -119,11 +119,9 @@ struct inet_fill_args { #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) -static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; - static u32 inet_addr_hash(const struct net *net, __be32 addr) { - u32 val = (__force u32) addr ^ net_hash_mix(net); + u32 val = __ipv4_addr_hash(addr, net_hash_mix(net)); return hash_32(val, IN4_ADDR_HSIZE_SHIFT); } @@ -133,13 +131,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) u32 hash = inet_addr_hash(net, ifa->ifa_local); ASSERT_RTNL(); - hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); + hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]); } static void inet_hash_remove(struct in_ifaddr *ifa) { ASSERT_RTNL(); - hlist_del_init_rcu(&ifa->hash); + hlist_del_init_rcu(&ifa->addr_lst); } /** @@ -186,9 +184,8 @@ struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr) u32 hash = inet_addr_hash(net, addr); struct in_ifaddr *ifa; - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) - if (ifa->ifa_local == addr && - net_eq(dev_net(ifa->ifa_dev->dev), net)) + hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst) + if (ifa->ifa_local == addr) return ifa; return NULL; @@ -227,7 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev) in_dev_hold(in_dev); ifa->ifa_dev = in_dev; - INIT_HLIST_NODE(&ifa->hash); + INIT_HLIST_NODE(&ifa->addr_lst); return ifa; } @@ -499,26 +496,18 @@ static void inet_del_ifa(struct in_device *in_dev, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } -static void check_lifetime(struct work_struct *work); - -static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); - static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid, struct netlink_ext_ack *extack) { struct in_ifaddr __rcu **last_primary, **ifap; struct in_device *in_dev = ifa->ifa_dev; + struct net *net = dev_net(in_dev->dev); struct in_validator_info ivi; struct in_ifaddr *ifa1; int ret; ASSERT_RTNL(); - if (!ifa->ifa_local) { - inet_free_ifa(ifa); - return 0; - } - ifa->ifa_flags &= ~IFA_F_SECONDARY; last_primary = &in_dev->ifa_list; @@ -576,8 +565,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); - cancel_delayed_work(&check_lifetime_work); - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); + cancel_delayed_work(&net->ipv4.addr_chk_work); + queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0); /* Send message first, then call notifier. Notifier will trigger FIB update, so that @@ -590,14 +579,17 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, static int inet_insert_ifa(struct in_ifaddr *ifa) { + if (!ifa->ifa_local) { + inet_free_ifa(ifa); + return 0; + } + return __inet_insert_ifa(ifa, NULL, 0, NULL); } static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { - struct in_device *in_dev = __in_dev_get_rtnl(dev); - - ASSERT_RTNL(); + struct in_device *in_dev = __in_dev_get_rtnl_net(dev); ipv4_devconf_setall(in_dev); neigh_parms_data_state_setall(in_dev->arp_parms); @@ -651,7 +643,7 @@ static int ip_mc_autojoin_config(struct net *net, bool join, struct sock *sk = net->ipv4.mc_autojoin_sk; int ret; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); lock_sock(sk); if (join) @@ -677,22 +669,24 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, struct in_ifaddr *ifa; int err; - ASSERT_RTNL(); - err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) - goto errout; + goto out; ifm = nlmsg_data(nlh); + + rtnl_net_lock(net); + in_dev = inetdev_by_index(net, ifm->ifa_index); if (!in_dev) { NL_SET_ERR_MSG(extack, "ipv4: Device not found"); err = -ENODEV; - goto errout; + goto unlock; } - for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL; + for (ifap = &in_dev->ifa_list; + (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) @@ -708,13 +702,16 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, if (ipv4_is_multicast(ifa->ifa_address)) ip_mc_autojoin_config(net, false, ifa); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); - return 0; + goto unlock; } NL_SET_ERR_MSG(extack, "ipv4: Address not found"); err = -EADDRNOTAVAIL; -errout: +unlock: + rtnl_net_unlock(net); +out: return err; } @@ -723,16 +720,19 @@ static void check_lifetime(struct work_struct *work) unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; struct hlist_node *n; + struct net *net; int i; + net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); for (i = 0; i < IN4_ADDR_HSIZE; i++) { + struct hlist_head *head = &net->ipv4.inet_addr_lst[i]; bool change_needed = false; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { + hlist_for_each_entry_rcu(ifa, head, addr_lst) { unsigned long age, tstamp; u32 preferred_lft; u32 valid_lft; @@ -769,8 +769,9 @@ static void check_lifetime(struct work_struct *work) rcu_read_unlock(); if (!change_needed) continue; - rtnl_lock(); - hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + + rtnl_net_lock(net); + hlist_for_each_entry_safe(ifa, n, head, addr_lst) { unsigned long age; if (ifa->ifa_flags & IFA_F_PERMANENT) @@ -786,7 +787,7 @@ static void check_lifetime(struct work_struct *work) struct in_ifaddr *tmp; ifap = &ifa->ifa_dev->ifa_list; - tmp = rtnl_dereference(*ifap); + tmp = rtnl_net_dereference(net, *ifap); while (tmp) { if (tmp == ifa) { inet_del_ifa(ifa->ifa_dev, @@ -794,7 +795,7 @@ static void check_lifetime(struct work_struct *work) break; } ifap = &tmp->ifa_next; - tmp = rtnl_dereference(*ifap); + tmp = rtnl_net_dereference(net, *ifap); } } else if (ifa->ifa_preferred_lft != INFINITY_LIFE_TIME && @@ -804,7 +805,7 @@ static void check_lifetime(struct work_struct *work) rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } } - rtnl_unlock(); + rtnl_net_unlock(net); } next_sec = round_jiffies_up(next); @@ -819,8 +820,8 @@ static void check_lifetime(struct work_struct *work) if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, - next_sched - now); + queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, + next_sched - now); } static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, @@ -849,35 +850,54 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, - __u32 *pvalid_lft, __u32 *pprefered_lft, - struct netlink_ext_ack *extack) +static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb, + struct netlink_ext_ack *extack, + __u32 *valid_lft, __u32 *prefered_lft) { - struct nlattr *tb[IFA_MAX+1]; - struct in_ifaddr *ifa; - struct ifaddrmsg *ifm; - struct net_device *dev; - struct in_device *in_dev; + struct ifaddrmsg *ifm = nlmsg_data(nlh); int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) - goto errout; - - ifm = nlmsg_data(nlh); - err = -EINVAL; + return err; if (ifm->ifa_prefixlen > 32) { NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length"); - goto errout; + return -EINVAL; } if (!tb[IFA_LOCAL]) { NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied"); - goto errout; + return -EINVAL; + } + + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid"); + return -EINVAL; + } + + *valid_lft = ci->ifa_valid; + *prefered_lft = ci->ifa_prefered; } + return 0; +} + +static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm = nlmsg_data(nlh); + struct in_device *in_dev; + struct net_device *dev; + struct in_ifaddr *ifa; + int err; + dev = __dev_get_by_index(net, ifm->ifa_index); err = -ENODEV; if (!dev) { @@ -885,7 +905,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, goto errout; } - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); err = -ENOBUFS; if (!in_dev) goto errout; @@ -906,8 +926,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); - ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : - ifm->ifa_flags; + ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags); ifa->ifa_scope = ifm->ifa_scope; ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); @@ -926,76 +945,69 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, if (tb[IFA_PROTO]) ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]); - if (tb[IFA_CACHEINFO]) { - struct ifa_cacheinfo *ci; - - ci = nla_data(tb[IFA_CACHEINFO]); - if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { - NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid"); - err = -EINVAL; - goto errout_free; - } - *pvalid_lft = ci->ifa_valid; - *pprefered_lft = ci->ifa_prefered; - } - return ifa; -errout_free: - inet_free_ifa(ifa); errout: return ERR_PTR(err); } -static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) +static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1; - if (!ifa->ifa_local) - return NULL; - - in_dev_for_each_ifa_rtnl(ifa1, in_dev) { + in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa) && ifa1->ifa_local == ifa->ifa_local) return ifa1; } + return NULL; } static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + __u32 prefered_lft = INFINITY_LIFE_TIME; + __u32 valid_lft = INFINITY_LIFE_TIME; struct net *net = sock_net(skb->sk); - struct in_ifaddr *ifa; struct in_ifaddr *ifa_existing; - __u32 valid_lft = INFINITY_LIFE_TIME; - __u32 prefered_lft = INFINITY_LIFE_TIME; + struct nlattr *tb[IFA_MAX + 1]; + struct in_ifaddr *ifa; + int ret; - ASSERT_RTNL(); + ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft); + if (ret < 0) + return ret; + + if (!nla_get_in_addr(tb[IFA_LOCAL])) + return 0; - ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack); - if (IS_ERR(ifa)) - return PTR_ERR(ifa); + rtnl_net_lock(net); - ifa_existing = find_matching_ifa(ifa); + ifa = inet_rtm_to_ifa(net, nlh, tb, extack); + if (IS_ERR(ifa)) { + ret = PTR_ERR(ifa); + goto unlock; + } + + ifa_existing = find_matching_ifa(net, ifa); if (!ifa_existing) { /* It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { - int ret = ip_mc_autojoin_config(net, true, ifa); - + ret = ip_mc_autojoin_config(net, true, ifa); if (ret < 0) { NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed"); inet_free_ifa(ifa); - return ret; + goto unlock; } } - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, - extack); + + ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack); } else { u32 new_metric = ifa->ifa_rt_priority; u8 new_proto = ifa->ifa_proto; @@ -1005,7 +1017,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) { NL_SET_ERR_MSG(extack, "ipv4: Address already assigned"); - return -EEXIST; + ret = -EEXIST; + goto unlock; } ifa = ifa_existing; @@ -1017,12 +1030,16 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, ifa->ifa_proto = new_proto; set_ifa_lifetime(ifa, valid_lft, prefered_lft); - cancel_delayed_work(&check_lifetime_work); + cancel_delayed_work(&net->ipv4.addr_chk_work); queue_delayed_work(system_power_efficient_wq, - &check_lifetime_work, 0); + &net->ipv4.addr_chk_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); } - return 0; + +unlock: + rtnl_net_unlock(net); + + return ret; } /* @@ -1109,7 +1126,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) goto out; } - rtnl_lock(); + rtnl_net_lock(net); ret = -ENODEV; dev = __dev_get_by_name(net, ifr->ifr_name); @@ -1119,7 +1136,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) if (colon) *colon = ':'; - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); if (in_dev) { if (tryaddrmatch) { /* Matthias Andree */ @@ -1129,7 +1146,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) This is checked above. */ for (ifap = &in_dev->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; + (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) { if (!strcmp(ifr->ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == @@ -1143,7 +1160,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) comparing just the label */ if (!ifa) { for (ifap = &in_dev->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; + (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) if (!strcmp(ifr->ifr_name, ifa->ifa_label)) break; @@ -1185,6 +1202,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) inet_del_ifa(in_dev, ifap, 1); break; } + + /* NETDEV_UP/DOWN/CHANGE could touch a peer dev */ + ASSERT_RTNL(); ret = dev_change_flags(dev, ifr->ifr_flags, NULL); break; @@ -1286,14 +1306,14 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) break; } done: - rtnl_unlock(); + rtnl_net_unlock(net); out: return ret; } int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev = __in_dev_get_rtnl_net(dev); const struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; @@ -1304,7 +1324,7 @@ int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) if (!in_dev) goto out; - in_dev_for_each_ifa_rtnl(ifa, in_dev) { + in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) { if (!buf) { done += size; continue; @@ -2375,7 +2395,7 @@ static void inet_forward_change(struct net *net) if (on) dev_disable_lro(dev); - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -2466,7 +2486,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write, if (write && *valp != val) { if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { - if (!rtnl_trylock()) { + if (!rtnl_net_trylock(net)) { /* Restore the original values before restarting */ *valp = val; *ppos = pos; @@ -2485,7 +2505,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write, idev->dev->ifindex, cnf); } - rtnl_unlock(); + rtnl_net_unlock(net); rt_cache_flush(net); } else inet_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -2678,14 +2698,21 @@ static struct ctl_table ctl_forward_entry[] = { static __net_init int devinet_init_net(struct net *net) { - int err; - struct ipv4_devconf *all, *dflt; #ifdef CONFIG_SYSCTL - struct ctl_table *tbl; struct ctl_table_header *forw_hdr; + struct ctl_table *tbl; #endif + struct ipv4_devconf *all, *dflt; + int err; + int i; err = -ENOMEM; + net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!net->ipv4.inet_addr_lst) + goto err_alloc_hash; + all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL); if (!all) goto err_alloc_all; @@ -2746,6 +2773,11 @@ static __net_init int devinet_init_net(struct net *net) net->ipv4.forw_hdr = forw_hdr; #endif + for (i = 0; i < IN4_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]); + + INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime); + net->ipv4.devconf_all = all; net->ipv4.devconf_dflt = dflt; return 0; @@ -2763,6 +2795,8 @@ err_alloc_ctl: err_alloc_dflt: kfree(all); err_alloc_all: + kfree(net->ipv4.inet_addr_lst); +err_alloc_hash: return err; } @@ -2770,7 +2804,11 @@ static __net_exit void devinet_exit_net(struct net *net) { #ifdef CONFIG_SYSCTL const struct ctl_table *tbl; +#endif + cancel_delayed_work_sync(&net->ipv4.addr_chk_work); + +#ifdef CONFIG_SYSCTL tbl = net->ipv4.forw_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.forw_hdr); __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, @@ -2781,6 +2819,7 @@ static __net_exit void devinet_exit_net(struct net *net) #endif kfree(net->ipv4.devconf_dflt); kfree(net->ipv4.devconf_all); + kfree(net->ipv4.inet_addr_lst); } static __net_initdata struct pernet_operations devinet_ops = { @@ -2796,25 +2835,25 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = { .set_link_af = inet_set_link_af, }; +static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, + {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, + .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + void __init devinet_init(void) { - int i; - - for (i = 0; i < IN4_ADDR_HSIZE; i++) - INIT_HLIST_HEAD(&inet_addr_lst[i]); - register_pernet_subsys(&devinet_ops); register_netdevice_notifier(&ip_netdev_notifier); - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); - - rtnl_af_register(&inet_af_ops); + if (rtnl_af_register(&inet_af_ops)) + panic("Unable to register inet_af_ops\n"); - rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); - rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, - RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE); - rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - inet_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); + rtnl_register_many(devinet_rtnl_msg_handlers); } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 80c4ea0e12f4..e0d94270da28 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, if (sp->len == XFRM_MAX_DEPTH) goto out_reset; - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, - (xfrm_address_t *)&ip_hdr(skb)->daddr, - spi, IPPROTO_ESP, AF_INET); + x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ip_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET); if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) { /* non-offload path will record the error and audit log */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 793e6781399a..272e42d81323 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -293,7 +293,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK, + .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb))), .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; @@ -342,10 +342,11 @@ EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev); * called with rcu_read_lock() */ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { struct net *net = dev_net(dev); + enum skb_drop_reason reason; struct flow_keys flkeys; int ret, no_addr; struct fib_result res; @@ -357,7 +358,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; - fl4.flowi4_tos = tos; + fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; @@ -377,9 +378,15 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; - if (res.type != RTN_UNICAST && - (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) - goto e_inval; + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; + goto e_inval; + } else if (!IN_DEV_ACCEPT_LOCAL(idev)) { + reason = SKB_DROP_REASON_IP_LOCAL_SOURCE; + goto e_inval; + } + } fib_combine_itag(itag, &res); dev_match = fib_info_nh_uses_dev(res.fi, dev); @@ -412,14 +419,14 @@ last_resort: return 0; e_inval: - return -EINVAL; + return -reason; e_rpf: - return -EXDEV; + return -SKB_DROP_REASON_IP_RPFILTER; } /* Ignore rp_filter for packets protected by IPsec. */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); @@ -440,7 +447,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, * and the same host but different containers are not. */ if (inet_lookup_ifaddr_rcu(net, src)) - return -EINVAL; + return -SKB_DROP_REASON_IP_LOCAL_SOURCE; ok: *itag = 0; @@ -448,7 +455,8 @@ ok: } full_check: - return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); + return __fib_validate_source(skb, src, dst, dscp, oif, dev, r, idev, + itag); } static inline __be32 sk_extract_addr(struct sockaddr *addr) @@ -1648,6 +1656,15 @@ static struct pernet_operations fib_net_ops = { .exit_batch = fib_net_exit_batch, }; +static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_NEWROUTE, + .doit = inet_rtm_newroute}, + {.protocol = PF_INET, .msgtype = RTM_DELROUTE, + .doit = inet_rtm_delroute}, + {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, +}; + void __init ip_fib_init(void) { fib_trie_init(); @@ -1657,8 +1674,5 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, - RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE); + rtnl_register_many(fib_rtnl_msg_handlers); } diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index 0e23ade74493..b1551c26554b 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -22,15 +22,15 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, ASSERT_RTNL(); info->family = AF_INET; - net->ipv4.fib_seq++; + /* Paired with READ_ONCE() in fib4_seq_read() */ + WRITE_ONCE(net->ipv4.fib_seq, net->ipv4.fib_seq + 1); return call_fib_notifiers(net, event_type, info); } -static unsigned int fib4_seq_read(struct net *net) +static unsigned int fib4_seq_read(const struct net *net) { - ASSERT_RTNL(); - - return net->ipv4.fib_seq + fib4_rules_seq_read(net); + /* Paired with WRITE_ONCE() in call_fib4_notifiers() */ + return READ_ONCE(net->ipv4.fib_seq) + fib4_rules_seq_read(net); } static int fib4_dump(struct net *net, struct notifier_block *nb, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b07292d50ee7..8325224ef072 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -74,7 +74,7 @@ int fib4_rules_dump(struct net *net, struct notifier_block *nb, return fib_rules_dump(net, nb, AF_INET, extack); } -unsigned int fib4_rules_seq_read(struct net *net) +unsigned int fib4_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, AF_INET); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ba2df3d2ac15..d2cee5c314f5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -50,17 +50,12 @@ #include "fib_lookup.h" -static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_info_hash_size; static unsigned int fib_info_hash_bits; static unsigned int fib_info_cnt; -#define DEVINDEX_HASHBITS 8 -#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) -static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; - /* for_nexthops and change_nexthops only used when nexthop object * is not set in a fib_info. The logic within can reference fib_nh. */ @@ -260,12 +255,11 @@ EXPORT_SYMBOL_GPL(free_fib_info); void fib_release_info(struct fib_info *fi) { - spin_lock_bh(&fib_info_lock); + ASSERT_RTNL(); if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); - /* Paired with READ_ONCE() in fib_create_info(). */ - WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); + fib_info_cnt--; if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); @@ -275,14 +269,13 @@ void fib_release_info(struct fib_info *fi) change_nexthops(fi) { if (!nexthop_nh->fib_nh_dev) continue; - hlist_del(&nexthop_nh->nh_hash); + hlist_del_rcu(&nexthop_nh->nh_hash); } endfor_nexthops(fi) } /* Paired with READ_ONCE() from fib_table_lookup() */ WRITE_ONCE(fi->fib_dead, 1); fib_info_put(fi); } - spin_unlock_bh(&fib_info_lock); } static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) @@ -322,17 +315,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) return 0; } -static inline unsigned int fib_devindex_hashfn(unsigned int val) -{ - return hash_32(val, DEVINDEX_HASHBITS); -} - -static struct hlist_head * -fib_info_devhash_bucket(const struct net_device *dev) +static struct hlist_head *fib_nh_head(struct net_device *dev) { - u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - - return &fib_info_devhash[fib_devindex_hashfn(val)]; + return &dev->fib_nh_head; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -347,11 +332,10 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, return val; } -static unsigned int fib_info_hashfn_result(unsigned int val) +static unsigned int fib_info_hashfn_result(const struct net *net, + unsigned int val) { - unsigned int mask = (fib_info_hash_size - 1); - - return (val ^ (val >> 7) ^ (val >> 12)) & mask; + return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits); } static inline unsigned int fib_info_hashfn(struct fib_info *fi) @@ -363,14 +347,14 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi) fi->fib_priority); if (fi->nh) { - val ^= fib_devindex_hashfn(fi->nh->id); + val ^= fi->nh->id; } else { for_nexthops(fi) { - val ^= fib_devindex_hashfn(nh->fib_nh_oif); + val ^= nh->fib_nh_oif; } endfor_nexthops(fi) } - return fib_info_hashfn_result(val); + return fib_info_hashfn_result(fi->fib_net, val); } /* no metrics, only nexthop id */ @@ -381,11 +365,11 @@ static struct fib_info *fib_find_info_nh(struct net *net, struct fib_info *fi; unsigned int hash; - hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id), + hash = fib_info_hashfn_1(cfg->fc_nh_id, cfg->fc_protocol, cfg->fc_scope, (__force u32)cfg->fc_prefsrc, cfg->fc_priority); - hash = fib_info_hashfn_result(hash); + hash = fib_info_hashfn_result(net, hash); head = &fib_info_hash[hash]; hlist_for_each_entry(fi, head, fib_hash) { @@ -437,28 +421,23 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) } /* Check, that the gateway is already configured. - * Used only by redirect accept routine. + * Used only by redirect accept routine, under rcu_read_lock(); */ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - spin_lock(&fib_info_lock); - - head = fib_info_devhash_bucket(dev); + head = fib_nh_head(dev); - hlist_for_each_entry(nh, head, nh_hash) { - if (nh->fib_nh_dev == dev && - nh->fib_nh_gw4 == gw && + hlist_for_each_entry_rcu(nh, head, nh_hash) { + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (nh->fib_nh_gw4 == gw && !(nh->fib_nh_flags & RTNH_F_DEAD)) { - spin_unlock(&fib_info_lock); return 0; } } - spin_unlock(&fib_info_lock); - return -1; } @@ -1277,7 +1256,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, unsigned int old_size = fib_info_hash_size; unsigned int i; - spin_lock_bh(&fib_info_lock); + ASSERT_RTNL(); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_info_hash_size = new_size; @@ -1314,8 +1293,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, } } - spin_unlock_bh(&fib_info_lock); - kvfree(old_info_hash); kvfree(old_laddrhash); } @@ -1391,6 +1368,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, int nhs = 1; struct net *net = cfg->fc_nlinfo.nl_net; + ASSERT_RTNL(); if (cfg->fc_type > RTN_MAX) goto err_inval; @@ -1433,8 +1411,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, err = -ENOBUFS; - /* Paired with WRITE_ONCE() in fib_release_info() */ - if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { + if (fib_info_cnt >= fib_info_hash_size) { unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; @@ -1593,7 +1570,7 @@ link_it: refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); - spin_lock_bh(&fib_info_lock); + fib_info_cnt++; hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); @@ -1611,11 +1588,10 @@ link_it: if (!nexthop_nh->fib_nh_dev) continue; - head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); - hlist_add_head(&nexthop_nh->nh_hash, head); + head = fib_nh_head(nexthop_nh->fib_nh_dev); + hlist_add_head_rcu(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } - spin_unlock_bh(&fib_info_lock); return fi; err_inval: @@ -1965,12 +1941,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - struct hlist_head *head = fib_info_devhash_bucket(dev); + struct hlist_head *head = fib_nh_head(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { - if (nh->fib_nh_dev == dev) - fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } @@ -1984,7 +1960,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - struct hlist_head *head = fib_info_devhash_bucket(dev); + struct hlist_head *head = fib_nh_head(dev); struct fib_info *prev_fi = NULL; int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; @@ -1998,7 +1974,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) int dead; BUG_ON(!fi->fib_nhs); - if (nh->fib_nh_dev != dev || fi == prev_fi) + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (fi == prev_fi) continue; prev_fi = fi; dead = 0; @@ -2148,7 +2125,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - head = fib_info_devhash_bucket(dev); + head = fib_nh_head(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { @@ -2156,7 +2133,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) int alive; BUG_ON(!fi->fib_nhs); - if (nh->fib_nh_dev != dev || fi == prev_fi) + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (fi == prev_fi) continue; prev_fi = fi; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 09e31757e96c..161f5526b86c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -292,15 +292,9 @@ static const int inflate_threshold = 50; static const int halve_threshold_root = 15; static const int inflate_threshold_root = 30; -static void __alias_free_mem(struct rcu_head *head) -{ - struct fib_alias *fa = container_of(head, struct fib_alias, rcu); - kmem_cache_free(fn_alias_kmem, fa); -} - static inline void alias_free_mem_rcu(struct fib_alias *fa) { - call_rcu(&fa->rcu, __alias_free_mem); + kfree_rcu(fa, rcu); } #define TNODE_VMALLOC_MAX \ diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 98b90107b5ab..3d9614609b2d 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -12,7 +12,7 @@ /* Global operation policy for fou */ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = { - [FOU_ATTR_PORT] = { .type = NLA_U16, }, + [FOU_ATTR_PORT] = { .type = NLA_BE16, }, [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, [FOU_ATTR_TYPE] = { .type = NLA_U8, }, @@ -21,7 +21,7 @@ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = { [FOU_ATTR_LOCAL_V6] = { .len = 16, }, [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, [FOU_ATTR_PEER_V6] = { .len = 16, }, - [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, }, + [FOU_ATTR_PEER_PORT] = { .type = NLA_BE16, }, [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e1384e7331d8..4f088fa1c2f2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -445,7 +445,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.saddr = saddr; fl4.flowi4_mark = mark; fl4.flowi4_uid = sock_net_uid(net, NULL); - fl4.flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb))); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); @@ -478,13 +478,11 @@ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb) return route_lookup_dev; } -static struct rtable *icmp_route_lookup(struct net *net, - struct flowi4 *fl4, +static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, - const struct iphdr *iph, - __be32 saddr, u8 tos, u32 mark, - int type, int code, - struct icmp_bxm *param) + const struct iphdr *iph, __be32 saddr, + dscp_t dscp, u32 mark, int type, + int code, struct icmp_bxm *param) { struct net_device *route_lookup_dev; struct dst_entry *dst, *dst2; @@ -498,7 +496,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->saddr = saddr; fl4->flowi4_mark = mark; fl4->flowi4_uid = sock_net_uid(net, NULL); - fl4->flowi4_tos = tos & INET_DSCP_MASK; + fl4->flowi4_tos = inet_dscp_to_dsfield(dscp); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; @@ -547,7 +545,7 @@ static struct rtable *icmp_route_lookup(struct net *net, orefdst = skb_in->_skb_refdst; /* save old refdst */ skb_dst_set(skb_in, NULL); err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, - tos, rt2->dst.dev); + dscp, rt2->dst.dev) ? -EINVAL : 0; dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); @@ -741,8 +739,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ipc.opt = &icmp_param.replyopts.opt; ipc.sockc.mark = mark; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, - type, code, &icmp_param); + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, + inet_dsfield_to_dscp(tos), mark, type, code, + &icmp_param); if (IS_ERR(rt)) goto out_unlock; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9bf09de6a2e7..6a238398acc9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1437,16 +1437,32 @@ static void ip_mc_hash_remove(struct in_device *in_dev, static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode, gfp_t gfp) { + struct ip_mc_list __rcu **mc_hash; struct ip_mc_list *im; ASSERT_RTNL(); - for_each_pmc_rtnl(in_dev, im) { - if (im->multiaddr == addr) { - im->users++; - ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0); - goto out; + mc_hash = rtnl_dereference(in_dev->mc_hash); + if (mc_hash) { + u32 hash = hash_32((__force u32)addr, MC_HASH_SZ_LOG); + + for (im = rtnl_dereference(mc_hash[hash]); + im; + im = rtnl_dereference(im->next_hash)) { + if (im->multiaddr == addr) + break; } + } else { + for_each_pmc_rtnl(in_dev, im) { + if (im->multiaddr == addr) + break; + } + } + + if (im) { + im->users++; + ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0); + goto out; } im = kzalloc(sizeof(*im), gfp); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2b698f8419fe..491c2c6b683e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -775,7 +775,8 @@ void inet_csk_clear_xmit_timers(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_pending = icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_pending, 0); + smp_store_release(&icsk->icsk_ack.pending, 0); sk_stop_timer(sk, &icsk->icsk_retransmit_timer); sk_stop_timer(sk, &icsk->icsk_delack_timer); @@ -790,7 +791,8 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk) /* ongoing timer handlers need to acquire socket lock. */ sock_not_owned_by_me(sk); - icsk->icsk_pending = icsk->icsk_ack.pending = 0; + smp_store_release(&icsk->icsk_pending, 0); + smp_store_release(&icsk->icsk_ack.pending, 0); sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 67639309163d..321acc8abf17 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -247,6 +247,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; + u8 icsk_pending; int protocol; cb_data = cb->data; @@ -307,14 +308,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto out; } - if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + icsk_pending = smp_load_acquire(&icsk->icsk_pending); + if (icsk_pending == ICSK_TIME_RETRANS || + icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + } else if (icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 5bd759963451..5ab56f4cb529 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -128,11 +128,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, return NULL; } -static void inetpeer_free_rcu(struct rcu_head *head) -{ - kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); -} - /* perform garbage collect on all items stacked during a lookup */ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], @@ -168,7 +163,7 @@ static void inet_peer_gc(struct inet_peer_base *base, if (p) { rb_erase(&p->rb_node, &base->rb_root); base->total--; - call_rcu(&p->rcu, inetpeer_free_rcu); + kfree_rcu(p, rcu); } } } @@ -242,7 +237,7 @@ void inet_putpeer(struct inet_peer *p) WRITE_ONCE(p->dtime, (__u32)jiffies); if (refcount_dec_and_test(&p->refcnt)) - call_rcu(&p->rcu, inetpeer_free_rcu); + kfree_rcu(p, rcu); } EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a92664a5ef2e..07036a2943c1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -132,12 +132,12 @@ static bool frag_expire_skip_icmp(u32 user) */ static void ip_expire(struct timer_list *t) { + enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; struct inet_frag_queue *frag = from_timer(frag, t, timer); const struct iphdr *iph; struct sk_buff *head = NULL; struct net *net; struct ipq *qp; - int err; qp = container_of(frag, struct ipq, q); net = qp->q.fqdir->net; @@ -175,14 +175,15 @@ static void ip_expire(struct timer_list *t) /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev); - if (err) + reason = ip_route_input_noref(head, iph->daddr, iph->saddr, + ip4h_dscp(iph), head->dev); + if (reason) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ + reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; if (frag_expire_skip_icmp(qp->q.key.v4.user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; @@ -195,7 +196,7 @@ out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); - kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); + kfree_skb_reason(head, reason); ipq_put(qp); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b6e7d4921309..f0a4dda246ab 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -322,15 +322,14 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, int err, drop_reason; struct rtable *rt; - drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; - if (ip_can_use_hint(skb, iph, hint)) { - err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, - dev, hint); - if (unlikely(err)) + drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev, hint); + if (unlikely(drop_reason)) goto drop_error; } + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && !skb_dst(skb) && !skb->sk && @@ -362,10 +361,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, * how the packet travels inside Linux networking. */ if (!skb_valid_dst(skb)) { - err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, dev); - if (unlikely(err)) + drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev); + if (unlikely(drop_reason)) goto drop_error; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; } else { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -425,10 +425,8 @@ drop: return NET_RX_DROP; drop_error: - if (err == -EXDEV) { - drop_reason = SKB_DROP_REASON_IP_RPFILTER; + if (drop_reason == SKB_DROP_REASON_IP_RPFILTER) __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); - } goto drop; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 68aedb8877b9..e3321932bec0 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -617,7 +617,8 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) orefdst = skb->_skb_refdst; skb_dst_set(skb, NULL); - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); + err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph), + dev) ? -EINVAL : 0; rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 49811c9281d4..0065b1996c94 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -973,7 +973,7 @@ static int __ip_append_data(struct sock *sk, unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = dst_rtable(cork->dst); - bool paged, hold_tskey, extra_uref = false; + bool paged, hold_tskey = false, extra_uref = false; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; @@ -1049,10 +1049,15 @@ static int __ip_append_data(struct sock *sk, cork->length += length; - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* So, what's going on in the loop below? * @@ -1326,7 +1331,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->priority = ipc->priority; cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; - sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); + sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags); + if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { + cork->flags |= IPCORK_TS_OPT_ID; + cork->ts_opt_id = ipc->sockc.ts_opt_id; + } return 0; } @@ -1587,7 +1596,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, * Generic function to send a packet as reply to another packet. * Used to send some TCP resets/acks so far. */ -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk, + struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, @@ -1653,6 +1663,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; + if (orig_sk) + skb_set_owner_edemux(nskb, (struct sock *)orig_sk); if (transmit_time) nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 089864c6a35e..c58dd78509a2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -288,7 +288,7 @@ static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); } -static unsigned int ipmr_rules_seq_read(struct net *net) +static unsigned int ipmr_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); } @@ -346,7 +346,7 @@ static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, return 0; } -static unsigned int ipmr_rules_seq_read(struct net *net) +static unsigned int ipmr_rules_seq_read(const struct net *net) { return 0; } @@ -2081,7 +2081,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = iph->tos & INET_DSCP_MASK, + .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)), .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? @@ -2546,9 +2546,9 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err < 0) goto errout; - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; - grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; - tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; + src = nla_get_in_addr_default(tb[RTA_SRC], 0); + grp = nla_get_in_addr_default(tb[RTA_DST], 0); + tableid = nla_get_u32_default(tb[RTA_TABLE], 0); mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); if (!mrt) { @@ -3035,11 +3035,9 @@ static const struct net_protocol pim_protocol = { }; #endif -static unsigned int ipmr_seq_read(struct net *net) +static unsigned int ipmr_seq_read(const struct net *net) { - ASSERT_RTNL(); - - return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); + return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); } static int ipmr_dump(struct net *net, struct notifier_block *nb, @@ -3139,6 +3137,17 @@ static struct pernet_operations ipmr_net_ops = { .exit_batch = ipmr_net_exit_batch, }; +static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, + .dumpit = ipmr_rtm_dumplink}, + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, + .doit = ipmr_rtm_route}, + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, + .doit = ipmr_rtm_route}, + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, + .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute}, +}; + int __init ip_mr_init(void) { int err; @@ -3159,15 +3168,8 @@ int __init ip_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); - rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, - ipmr_rtm_route, NULL, 0); - rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, - ipmr_rtm_route, NULL, 0); - - rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, - NULL, ipmr_rtm_dumplink, 0); + rtnl_register_many(ipmr_rtnl_msg_handlers); + return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index e0aab66cd925..08bc3f2c0078 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -44,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un */ fl4.daddr = iph->daddr; fl4.saddr = saddr; - fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0; fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1b991b889506..ef8009281da5 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -12,7 +12,13 @@ config NF_DEFRAG_IPV4 # old sockopt interface and eval loop config IP_NF_IPTABLES_LEGACY - tristate + tristate "Legacy IP tables support" + default n + select NETFILTER_XTABLES + help + iptables is a legacy packet classifier. + This is not needed if you are using iptables over nftables + (iptables-nft). config NF_SOCKET_IPV4 tristate "IPv4 socket lookup support" @@ -318,7 +324,13 @@ endif # IP_NF_IPTABLES # ARP tables config IP_NF_ARPTABLES - tristate + tristate "Legacy ARPTABLES support" + depends on NETFILTER_XTABLES + default n + help + arptables is a legacy packet classifier. + This is not needed if you are using arptables over nftables + (iptables-nft). config NFT_COMPAT_ARP tristate diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 1ce7a1655b97..a27782d7653e 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - flow.flowi4_tos = iph->tos & INET_DSCP_MASK; + flow.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); flow.flowi4_uid = sock_net_uid(xt_net(par), NULL); diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index ec94ee1051c7..25e1e8eb18dd 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -33,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb, fl4.flowi4_oif = oif; fl4.daddr = gw->s_addr; - fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 09fff5d424ef..625adbc42037 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -11,6 +11,7 @@ #include <net/netfilter/nft_fib.h> #include <net/inet_dscp.h> +#include <net/ip.h> #include <net/ip_fib.h> #include <net/route.h> @@ -107,7 +108,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (priv->flags & NFTA_FIB_F_MARK) fl4.flowi4_mark = pkt->skb->mark; - fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); if (priv->flags & NFTA_FIB_F_DADDR) { fl4.daddr = iph->daddr; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 93aaea0006ba..09a3d73b45ba 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3247,12 +3247,8 @@ static int nh_valid_get_del_req(const struct nlmsghdr *nlh, return -EINVAL; } - if (op_flags) { - if (tb[NHA_OP_FLAGS]) - *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); - else - *op_flags = 0; - } + if (op_flags) + *op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0); return 0; } @@ -3433,10 +3429,7 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, if (err < 0) return err; - if (tb[NHA_OP_FLAGS]) - filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); - else - filter->op_flags = 0; + filter->op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0); return __nh_valid_dump_req(nlh, tb, filter, cb->extack); } @@ -4042,25 +4035,30 @@ static struct pernet_operations nexthop_net_ops = { .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, }; +static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop}, + {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop}, + {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop, + .dumpit = rtm_dump_nexthop}, + {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket, + .dumpit = rtm_dump_nexthop_bucket}, + {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP, + .doit = rtm_new_nexthop}, + {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP, + .dumpit = rtm_dump_nexthop}, + {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP, + .doit = rtm_new_nexthop}, + {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP, + .dumpit = rtm_dump_nexthop}, +}; + static int __init nexthop_init(void) { register_pernet_subsys(&nexthop_net_ops); register_netdevice_notifier(&nh_netdev_notifier); - rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, - rtm_dump_nexthop, 0); - - rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); - - rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); - - rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket, - rtm_dump_nexthop_bucket, 0); + rtnl_register_many(nexthop_rtnl_msg_handlers); return 0; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 474dfd263c8b..0e9e01967ec9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -370,7 +370,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - skb_setup_tx_timestamp(skb, sockc->tsflags); + skb_setup_tx_timestamp(skb, sockc); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 723ac9181558..e5603e84b20d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1027,6 +1027,19 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) struct fib_nh_common *nhc; fib_select_path(net, &res, fl4, NULL); +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (fib_info_num_path(res.fi) > 1) { + int nhsel; + + for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) { + nhc = fib_info_nhc(res.fi, nhsel); + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, + jiffies + net->ipv4.ip_rt_mtu_expires); + } + rcu_read_unlock(); + return; + } +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); @@ -1263,7 +1276,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = iph->tos & INET_DSCP_MASK, + .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)), .flowi4_oif = rt->dst.dev->ifindex, .flowi4_iif = skb->dev->ifindex, .flowi4_mark = skb->mark, @@ -1665,49 +1678,54 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) EXPORT_SYMBOL(rt_dst_clone); /* called in rcu_read_lock() section */ -int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, - struct in_device *in_dev, u32 *itag) +enum skb_drop_reason +ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + struct in_device *in_dev, u32 *itag) { - int err; + enum skb_drop_reason reason; /* Primary sanity checks. */ if (!in_dev) - return -EINVAL; + return SKB_DROP_REASON_NOT_SPECIFIED; - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || - skb->protocol != htons(ETH_P_IP)) - return -EINVAL; + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) + return SKB_DROP_REASON_IP_INVALID_SOURCE; + + if (skb->protocol != htons(ETH_P_IP)) + return SKB_DROP_REASON_INVALID_PROTO; if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) - return -EINVAL; + return SKB_DROP_REASON_IP_LOCALNET; if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr) && ip_hdr(skb)->protocol != IPPROTO_IGMP) - return -EINVAL; + return SKB_DROP_REASON_IP_INVALID_SOURCE; } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, - in_dev, itag); - if (err < 0) - return err; + reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0, + dev, in_dev, itag); + if (reason) + return reason; } - return 0; + return SKB_NOT_DROPPED_YET; } /* called in rcu_read_lock() section */ -static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, int our) +static enum skb_drop_reason +ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, int our) { struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; + enum skb_drop_reason reason; struct rtable *rth; u32 itag = 0; - int err; - err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); - if (err) - return err; + reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev, + &itag); + if (reason) + return reason; if (our) flags |= RTCF_LOCAL; @@ -1718,7 +1736,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, false); if (!rth) - return -ENOBUFS; + return SKB_DROP_REASON_NOMEM; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; @@ -1734,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, skb_dst_drop(skb); skb_dst_set(skb, &rth->dst); - return 0; + return SKB_NOT_DROPPED_YET; } @@ -1764,11 +1782,12 @@ static void ip_handle_martian_source(struct net_device *dev, } /* called in rcu_read_lock() section */ -static int __mkroute_input(struct sk_buff *skb, - const struct fib_result *res, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) +static enum skb_drop_reason +__mkroute_input(struct sk_buff *skb, const struct fib_result *res, + struct in_device *in_dev, __be32 daddr, + __be32 saddr, dscp_t dscp) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct fib_nh_common *nhc = FIB_RES_NHC(*res); struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; @@ -1782,12 +1801,13 @@ static int __mkroute_input(struct sk_buff *skb, out_dev = __in_dev_get_rcu(dev); if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); - return -EINVAL; + return reason; } - err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), + err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { + reason = -err; ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -1815,7 +1835,7 @@ static int __mkroute_input(struct sk_buff *skb, */ if (out_dev == in_dev && IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { - err = -EINVAL; + reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE; goto cleanup; } } @@ -1838,7 +1858,7 @@ static int __mkroute_input(struct sk_buff *skb, rth = rt_dst_alloc(out_dev->dev, 0, res->type, IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { - err = -ENOBUFS; + reason = SKB_DROP_REASON_NOMEM; goto cleanup; } @@ -1852,9 +1872,9 @@ static int __mkroute_input(struct sk_buff *skb, lwtunnel_set_redirect(&rth->dst); skb_dst_set(skb, &rth->dst); out: - err = 0; - cleanup: - return err; + reason = SKB_NOT_DROPPED_YET; +cleanup: + return reason; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -2112,11 +2132,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int ip_mkroute_input(struct sk_buff *skb, - struct fib_result *res, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos, - struct flow_keys *hkeys) +static enum skb_drop_reason +ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, + struct in_device *in_dev, __be32 daddr, + __be32 saddr, dscp_t dscp, struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && fib_info_num_path(res->fi) > 1) { @@ -2128,50 +2147,57 @@ static int ip_mkroute_input(struct sk_buff *skb, #endif /* create a routing cache entry */ - return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); + return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp); } /* Implements all the saddr-related checks as ip_route_input_slow(), * assuming daddr is valid and the destination is not a local broadcast one. * Uses the provided hint instead of performing a route lookup. */ -int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, - const struct sk_buff *hint) +enum skb_drop_reason +ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + const struct sk_buff *hint) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); struct rtable *rt = skb_rtable(hint); struct net *net = dev_net(dev); - int err = -EINVAL; u32 tag = 0; if (!in_dev) - return -EINVAL; + return reason; - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } - if (ipv4_is_zeronet(saddr)) + if (ipv4_is_zeronet(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } - if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) + if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { + reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_source; + } if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; - tos &= INET_DSCP_MASK; - err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); - if (err < 0) + reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev, + in_dev, &tag); + if (reason) goto martian_source; skip_validate_source: skb_dst_copy(skb, hint); - return 0; + return SKB_NOT_DROPPED_YET; martian_source: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); - return err; + return reason; } /* get device for dst_alloc with local routes */ @@ -2200,10 +2226,12 @@ static struct net_device *ip_rt_get_dev(struct net *net, * called with rcu_read_lock() */ -static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, - struct fib_result *res) +static enum skb_drop_reason +ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + struct fib_result *res) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); struct flow_keys *flkeys = NULL, _flkeys; struct net *net = dev_net(dev); @@ -2231,8 +2259,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_tun_key.tun_id = 0; skb_dst_drop(skb); - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } res->fi = NULL; res->table = NULL; @@ -2242,21 +2272,29 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* Accept zero addresses only to limited broadcast; * I even do not know to fix it or not. Waiting for complains :-) */ - if (ipv4_is_zeronet(saddr)) + if (ipv4_is_zeronet(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } - if (ipv4_is_zeronet(daddr)) + if (ipv4_is_zeronet(daddr)) { + reason = SKB_DROP_REASON_IP_INVALID_DEST; goto martian_destination; + } /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), * and call it once if daddr or/and saddr are loopback addresses */ if (ipv4_is_loopback(daddr)) { - if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { + reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_destination; + } } else if (ipv4_is_loopback(saddr)) { - if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { + reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_source; + } } /* @@ -2266,7 +2304,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; - fl4.flowi4_tos = tos; + fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = 0; fl4.daddr = daddr; @@ -2298,10 +2336,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; } + err = -EINVAL; if (res->type == RTN_LOCAL) { - err = fib_validate_source(skb, saddr, daddr, tos, - 0, dev, in_dev, &itag); - if (err < 0) + reason = fib_validate_source_reason(skb, saddr, daddr, dscp, + 0, dev, in_dev, &itag); + if (reason) goto martian_source; goto local_input; } @@ -2310,21 +2349,28 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = -EHOSTUNREACH; goto no_route; } - if (res->type != RTN_UNICAST) + if (res->type != RTN_UNICAST) { + reason = SKB_DROP_REASON_IP_INVALID_DEST; goto martian_destination; + } make_route: - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); -out: return err; + reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, + flkeys); + +out: + return reason; brd_input: - if (skb->protocol != htons(ETH_P_IP)) - goto e_inval; + if (skb->protocol != htons(ETH_P_IP)) { + reason = SKB_DROP_REASON_INVALID_PROTO; + goto out; + } if (!ipv4_is_zeronet(saddr)) { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, - in_dev, &itag); - if (err < 0) + reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0, + dev, in_dev, &itag); + if (reason) goto martian_source; } flags |= RTCF_BROADCAST; @@ -2342,7 +2388,7 @@ local_input: rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); - err = 0; + reason = SKB_NOT_DROPPED_YET; goto out; } } @@ -2379,7 +2425,7 @@ local_input: rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); - err = 0; + reason = SKB_NOT_DROPPED_YET; goto out; no_route: @@ -2400,12 +2446,8 @@ martian_destination: &daddr, &saddr, dev->name); #endif -e_inval: - err = -EINVAL; - goto out; - e_nobufs: - err = -ENOBUFS; + reason = SKB_DROP_REASON_NOMEM; goto out; martian_source: @@ -2414,8 +2456,10 @@ martian_source: } /* called with rcu_read_lock held */ -static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, struct fib_result *res) +static enum skb_drop_reason +ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + struct fib_result *res) { /* Multicast recognition logic is moved from route cache to here. * The problem was that too many Ethernet cards have broken/missing @@ -2429,12 +2473,12 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, * route cache entry is created eventually. */ if (ipv4_is_multicast(daddr)) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); int our = 0; - int err = -EINVAL; if (!in_dev) - return err; + return -EINVAL; our = ip_check_mc_rcu(in_dev, daddr, saddr, ip_hdr(skb)->protocol); @@ -2455,27 +2499,27 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, IN_DEV_MFORWARD(in_dev)) #endif ) { - err = ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); + reason = ip_route_input_mc(skb, daddr, saddr, dscp, + dev, our); } - return err; + return reason; } - return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); + return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res); } -int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) +enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, + __be32 saddr, dscp_t dscp, + struct net_device *dev) { + enum skb_drop_reason reason; struct fib_result res; - int err; - tos &= INET_DSCP_MASK; rcu_read_lock(); - err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); + reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res); rcu_read_unlock(); - return err; + return reason; } EXPORT_SYMBOL(ip_route_input_noref); @@ -3233,10 +3277,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, return err; rtm = nlmsg_data(nlh); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; - dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; - iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; - mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + src = nla_get_in_addr_default(tb[RTA_SRC], 0); + dst = nla_get_in_addr_default(tb[RTA_DST], 0); + iif = nla_get_u32_default(tb[RTA_IIF], 0); + mark = nla_get_u32_default(tb[RTA_MARK], 0); if (tb[RTA_UID]) uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); else @@ -3262,7 +3306,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK; - fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; + fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0); fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; if (sport) @@ -3286,8 +3330,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, skb->dev = dev; skb->mark = mark; err = ip_route_input_rcu(skb, dst, src, - rtm->rtm_tos & INET_DSCP_MASK, dev, - &res); + inet_dsfield_to_dscp(rtm->rtm_tos), + dev, &res) ? -EINVAL : 0; rt = skb_rtable(skb); if (err == 0 && rt->dst.error) @@ -3634,6 +3678,11 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_IP_ROUTE_CLASSID */ +static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_GETROUTE, + .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, +}; + int __init ip_rt_init(void) { void *idents_hash; @@ -3691,8 +3740,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, - RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register_many(ip_rt_rtnl_msg_handlers); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4f77bd862e95..0d704bda6c41 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -477,15 +477,16 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) +static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc) { struct sk_buff *skb = tcp_write_queue_tail(sk); + u32 tsflags = sockc->tsflags; if (tsflags && skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); + sock_tx_timestamp(sk, sockc, &shinfo->tx_flags); if (tsflags & SOF_TIMESTAMPING_TX_ACK) tcb->txstamp_ack = 1; if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) @@ -1321,7 +1322,7 @@ wait_for_space: out: if (copied) { - tcp_tx_timestamp(sk, sockc.tsflags); + tcp_tx_timestamp(sk, &sockc); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: @@ -3335,7 +3336,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; - if (icsk->icsk_ca_ops->release) + if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); icsk->icsk_ca_initialized = 0; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index db6516092daf..bbb8d5f0eae7 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -109,12 +109,13 @@ bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code) * it's known that the keys in ao_info are matching peer's * family/address/VRF/etc. */ -struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, +struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, + struct tcp_ao_info *ao, int sndid, int rcvid) { struct tcp_ao_key *key; - hlist_for_each_entry_rcu(key, &ao->head, node) { + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { if ((sndid >= 0 && key->sndid != sndid) || (rcvid >= 0 && key->rcvid != rcvid)) continue; @@ -205,7 +206,7 @@ static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index, if (!ao) return NULL; - hlist_for_each_entry_rcu(key, &ao->head, node) { + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { u8 prefixlen = min(prefix, key->prefixlen); if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen, @@ -793,7 +794,7 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, if (!ao_info) return -ENOENT; - *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + *key = tcp_ao_established_key(sk, ao_info, aoh->rnext_keyid, -1); if (!*key) return -ENOENT; *traffic_key = snd_other_key(*key); @@ -979,7 +980,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, */ key = READ_ONCE(info->rnext_key); if (key->rcvid != aoh->keyid) { - key = tcp_ao_established_key(info, -1, aoh->keyid); + key = tcp_ao_established_key(sk, info, -1, aoh->keyid); if (!key) goto key_not_found; } @@ -1003,7 +1004,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, aoh->rnext_keyid, tcp_ao_hdr_maclen(aoh)); /* If the key is not found we do nothing. */ - key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); + key = tcp_ao_established_key(sk, info, aoh->rnext_keyid, -1); if (key) /* pairs with tcp_ao_del_cmd */ WRITE_ONCE(info->current_key, key); @@ -1163,7 +1164,7 @@ void tcp_ao_established(struct sock *sk) if (!ao) return; - hlist_for_each_entry_rcu(key, &ao->head, node) + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) tcp_ao_cache_traffic_keys(sk, ao, key); } @@ -1180,7 +1181,7 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq); ao->rcv_sne = 0; - hlist_for_each_entry_rcu(key, &ao->head, node) + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) tcp_ao_cache_traffic_keys(sk, ao, key); } @@ -1256,14 +1257,14 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, key_head = rcu_dereference(hlist_first_rcu(&new_ao->head)); first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node); - key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1); + key = tcp_ao_established_key(req_to_sk(req), new_ao, tcp_rsk(req)->ao_keyid, -1); if (key) new_ao->current_key = key; else new_ao->current_key = first_key; /* set rnext_key */ - key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next); + key = tcp_ao_established_key(req_to_sk(req), new_ao, -1, tcp_rsk(req)->ao_rcv_next); if (key) new_ao->rnext_key = key; else @@ -1857,12 +1858,12 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, * if there's any. */ if (cmd.set_current) { - new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1); if (!new_current) return -ENOENT; } if (cmd.set_rnext) { - new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext); if (!new_rnext) return -ENOENT; } @@ -1902,7 +1903,8 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, * "It is presumed that an MKT affecting a particular * connection cannot be destroyed during an active connection" */ - hlist_for_each_entry_rcu(key, &ao_info->head, node) { + hlist_for_each_entry_rcu(key, &ao_info->head, node, + lockdep_sock_is_held(sk)) { if (cmd.sndid != key->sndid || cmd.rcvid != key->rcvid) continue; @@ -2000,14 +2002,14 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, * if there's any. */ if (cmd.set_current) { - new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1); if (!new_current) { err = -ENOENT; goto out; } } if (cmd.set_rnext) { - new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext); if (!new_rnext) { err = -ENOENT; goto out; @@ -2101,7 +2103,8 @@ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) * The layout of the fields in the user and kernel structures is expected to * be the same (including in the 32bit vs 64bit case). */ -static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, +static int tcp_ao_copy_mkts_to_user(const struct sock *sk, + struct tcp_ao_info *ao_info, sockptr_t optval, sockptr_t optlen) { struct tcp_ao_getsockopt opt_in, opt_out; @@ -2229,7 +2232,8 @@ static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, /* May change in RX, while we're dumping, pre-fetch it */ current_key = READ_ONCE(ao_info->current_key); - hlist_for_each_entry_rcu(key, &ao_info->head, node) { + hlist_for_each_entry_rcu(key, &ao_info->head, node, + lockdep_sock_is_held(sk)) { if (opt_in.get_all) goto match; @@ -2309,7 +2313,7 @@ int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) if (!ao_info) return -ENOENT; - return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen); + return tcp_ao_copy_mkts_to_user(sk, ao_info, optval, optlen); } int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) @@ -2396,7 +2400,7 @@ int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen) WRITE_ONCE(ao->snd_sne, cmd.snd_sne); WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne); - hlist_for_each_entry_rcu(key, &ao->head, node) + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) tcp_ao_cache_traffic_keys(sk, ao, key); return 0; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 0306d257fa64..df758adbb445 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -270,8 +270,9 @@ void tcp_cleanup_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_ca_ops->release) + if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); + icsk->icsk_ca_initialized = 0; bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d844e1f867f..5bdf13ac26ef 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4921,8 +4921,8 @@ static bool tcp_ooo_try_coalesce(struct sock *sk, return res; } -static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason reason) +noinline_for_tracing static void +tcp_drop_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { sk_drops_add(sk, skb); sk_skb_reason_drop(sk, skb, reason); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5afe5e57c89b..a38c8b1f44db 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -907,7 +907,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, ctl_sk->sk_mark = 0; ctl_sk->sk_priority = 0; } - ip_send_unicast_reply(ctl_sk, + ip_send_unicast_reply(ctl_sk, sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len, @@ -1021,7 +1021,7 @@ static void tcp_v4_send_ack(const struct sock *sk, ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); - ip_send_unicast_reply(ctl_sk, + ip_send_unicast_reply(ctl_sk, sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len, @@ -1053,7 +1053,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) } if (aoh) - key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + key.ao_key = tcp_ao_established_key(sk, ao_info, + aoh->rnext_keyid, -1); } } if (key.ao_key) { @@ -2900,15 +2901,17 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); + u8 icsk_pending; int rx_queue; int state; - if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + icsk_pending = smp_load_acquire(&icsk->icsk_pending); + if (icsk_pending == ICSK_TIME_RETRANS || + icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sk->sk_timer)) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 68804fd01daf..5485a70b5fe5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2954,10 +2954,8 @@ void tcp_send_loss_probe(struct sock *sk) } skb = skb_rb_last(&sk->tcp_rtx_queue); if (unlikely(!skb)) { - WARN_ONCE(tp->packets_out, - "invalid inflight: %u state %u cwnd %u mss %d\n", - tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss); - inet_csk(sk)->icsk_pending = 0; + tcp_warn_once(sk, tp->packets_out, "invalid inflight: "); + smp_store_release(&inet_csk(sk)->icsk_pending, 0); return; } @@ -2990,7 +2988,7 @@ probe_sent: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ - inet_csk(sk)->icsk_pending = 0; + smp_store_release(&inet_csk(sk)->icsk_pending, 0); rearm_timer: tcp_rearm_rto(sk); } @@ -3728,7 +3726,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, switch (synack_type) { case TCP_SYNACK_NORMAL: - skb_set_owner_w(skb, req_to_sk(req)); + skb_set_owner_edemux(skb, req_to_sk(req)); break; case TCP_SYNACK_COOKIE: /* Under synflood, we do not attach skb to a socket, @@ -4131,7 +4129,10 @@ int tcp_connect(struct sock *sk) if (unlikely(!buff)) return -ENOBUFS; - tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); + /* SYN eats a sequence byte, write_seq updated by + * tcp_connect_queue_skb(). + */ + tcp_init_nondata_skb(buff, tp->write_seq, TCPHDR_SYN); tcp_mstamp_refresh(tp); tp->retrans_stamp = tcp_time_stamp_ts(tp); tcp_connect_queue_skb(sk, buff); @@ -4221,7 +4222,8 @@ void tcp_send_delayed_ack(struct sock *sk) if (!time_before(timeout, icsk->icsk_ack.timeout)) timeout = icsk->icsk_ack.timeout; } - icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + smp_store_release(&icsk->icsk_ack.pending, + icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 79064580c8c0..b412ed88ccd9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -361,6 +361,14 @@ static void tcp_delack_timer(struct timer_list *t) from_timer(icsk, t, icsk_delack_timer); struct sock *sk = &icsk->icsk_inet.sk; + /* Avoid taking socket spinlock if there is no ACK to send. + * The compressed_ack check is racy, but a separate hrtimer + * will take care of it eventually. + */ + if (!(smp_load_acquire(&icsk->icsk_ack.pending) & ICSK_ACK_TIMER) && + !READ_ONCE(tcp_sk(sk)->compressed_ack)) + goto out; + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_delack_timer_handler(sk); @@ -371,6 +379,7 @@ static void tcp_delack_timer(struct timer_list *t) sock_hold(sk); } bh_unlock_sock(sk); +out: sock_put(sk); } @@ -701,11 +710,11 @@ void tcp_write_timer_handler(struct sock *sk) tcp_send_loss_probe(sk); break; case ICSK_TIME_RETRANS: - icsk->icsk_pending = 0; + smp_store_release(&icsk->icsk_pending, 0); tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: - icsk->icsk_pending = 0; + smp_store_release(&icsk->icsk_pending, 0); tcp_probe_timer(sk); break; } @@ -717,6 +726,10 @@ static void tcp_write_timer(struct timer_list *t) from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; + /* Avoid locking the socket when there is no pending event. */ + if (!smp_load_acquire(&icsk->icsk_pending)) + goto out; + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_write_timer_handler(sk); @@ -726,6 +739,7 @@ static void tcp_write_timer(struct timer_list *t) sock_hold(sk); } bh_unlock_sock(sk); +out: sock_put(sk); } @@ -851,6 +865,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) * LINUX_MIB_TCPACKCOMPRESSED accurate. */ tp->compressed_ack--; + tcp_mstamp_refresh(tp); tcp_send_ack(sk); } } else { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2849b273b131..6a01905d379f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -100,6 +100,7 @@ #include <net/net_namespace.h> #include <net/icmp.h> #include <net/inet_hashtables.h> +#include <net/ip.h> #include <net/ip_tunnels.h> #include <net/route.h> #include <net/checksum.h> @@ -115,7 +116,6 @@ #include <net/addrconf.h> #include <net/udp_tunnel.h> #include <net/gro.h> -#include <net/inet_dscp.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6_stubs.h> #endif @@ -410,7 +410,6 @@ static int compute_score(struct sock *sk, const struct net *net, return score; } -INDIRECT_CALLABLE_SCOPE u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { @@ -419,6 +418,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, return __inet_ehashfn(laddr, lport, faddr, fport, udp_ehash_secret + net_hash_mix(net)); } +EXPORT_SYMBOL(udp_ehashfn); /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(const struct net *net, @@ -478,6 +478,159 @@ rescore: return result; } +#if IS_ENABLED(CONFIG_BASE_SMALL) +static struct sock *udp4_lib_lookup4(const struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, + int dif, int sdif, + struct udp_table *udptable) +{ + return NULL; +} + +static void udp_rehash4(struct udp_table *udptable, struct sock *sk, + u16 newhash4) +{ +} + +static void udp_unhash4(struct udp_table *udptable, struct sock *sk) +{ +} +#else /* !CONFIG_BASE_SMALL */ +static struct sock *udp4_lib_lookup4(const struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, + int dif, int sdif, + struct udp_table *udptable) +{ + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); + const struct hlist_nulls_node *node; + struct udp_hslot *hslot4; + unsigned int hash4, slot; + struct udp_sock *up; + struct sock *sk; + + hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport); + slot = hash4 & udptable->mask; + hslot4 = &udptable->hash4[slot]; + INET_ADDR_COOKIE(acookie, saddr, daddr); + +begin: + /* SLAB_TYPESAFE_BY_RCU not used, so we don't need to touch sk_refcnt */ + udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) { + sk = (struct sock *)up; + if (inet_match(net, sk, acookie, ports, dif, sdif)) + return sk; + } + + /* if the nulls value we got at the end of this lookup is not the + * expected one, we must restart lookup. We probably met an item that + * was moved to another chain due to rehash. + */ + if (get_nulls_value(node) != slot) + goto begin; + + return NULL; +} + +/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */ +static void udp_rehash4(struct udp_table *udptable, struct sock *sk, + u16 newhash4) +{ + struct udp_hslot *hslot4, *nhslot4; + + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); + nhslot4 = udp_hashslot4(udptable, newhash4); + udp_sk(sk)->udp_lrpa_hash = newhash4; + + if (hslot4 != nhslot4) { + spin_lock_bh(&hslot4->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); + hslot4->count--; + spin_unlock_bh(&hslot4->lock); + + spin_lock_bh(&nhslot4->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, + &nhslot4->nulls_head); + nhslot4->count++; + spin_unlock_bh(&nhslot4->lock); + } +} + +static void udp_unhash4(struct udp_table *udptable, struct sock *sk) +{ + struct udp_hslot *hslot2, *hslot4; + + if (udp_hashed4(sk)) { + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); + + spin_lock(&hslot4->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); + hslot4->count--; + spin_unlock(&hslot4->lock); + + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); + } +} + +void udp_lib_hash4(struct sock *sk, u16 hash) +{ + struct udp_hslot *hslot, *hslot2, *hslot4; + struct net *net = sock_net(sk); + struct udp_table *udptable; + + /* Connected udp socket can re-connect to another remote address, + * so rehash4 is needed. + */ + udptable = net->ipv4.udp_table; + if (udp_hashed4(sk)) { + udp_rehash4(udptable, sk, hash); + return; + } + + hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + hslot4 = udp_hashslot4(udptable, hash); + udp_sk(sk)->udp_lrpa_hash = hash; + + spin_lock_bh(&hslot->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); + + spin_lock(&hslot4->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, + &hslot4->nulls_head); + hslot4->count++; + spin_unlock(&hslot4->lock); + + spin_lock(&hslot2->lock); + udp_hash4_inc(hslot2); + spin_unlock(&hslot2->lock); + + spin_unlock_bh(&hslot->lock); +} +EXPORT_SYMBOL(udp_lib_hash4); + +/* call with sock lock */ +void udp4_hash4(struct sock *sk) +{ + struct net *net = sock_net(sk); + unsigned int hash; + + if (sk_unhashed(sk) || sk->sk_rcv_saddr == htonl(INADDR_ANY)) + return; + + hash = udp_ehashfn(net, sk->sk_rcv_saddr, sk->sk_num, + sk->sk_daddr, sk->sk_dport); + + udp_lib_hash4(sk, hash); +} +EXPORT_SYMBOL(udp4_hash4); +#endif /* CONFIG_BASE_SMALL */ + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -486,13 +639,19 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, int sdif, struct udp_table *udptable, struct sk_buff *skb) { unsigned short hnum = ntohs(dport); - unsigned int hash2, slot2; struct udp_hslot *hslot2; struct sock *result, *sk; + unsigned int hash2; hash2 = ipv4_portaddr_hash(net, daddr, hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; + hslot2 = udp_hashslot2(udptable, hash2); + + if (udp_has_hash4(hslot2)) { + result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum, + dif, sdif, udptable); + if (result) /* udp4_lib_lookup4 return sk or NULL */ + return result; + } /* Lookup connected or non-wildcard socket */ result = udp4_lib_lookup2(net, saddr, sport, @@ -519,8 +678,7 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, /* Lookup wildcard sockets */ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; + hslot2 = udp_hashslot2(udptable, hash2); result = udp4_lib_lookup2(net, saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif, @@ -1935,6 +2093,18 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } EXPORT_SYMBOL(udp_pre_connect); +static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + if (!res) + udp4_hash4(sk); + release_sock(sk); + return res; +} + int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -1994,6 +2164,8 @@ void udp_lib_unhash(struct sock *sk) hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); + + udp_unhash4(udptable, sk); } spin_unlock_bh(&hslot->lock); } @@ -2003,7 +2175,7 @@ EXPORT_SYMBOL(udp_lib_unhash); /* * inet_rcv_saddr was changed, we must rehash secondary hash */ -void udp_lib_rehash(struct sock *sk, u16 newhash) +void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) { if (sk_hashed(sk)) { struct udp_table *udptable = udp_get_table_prot(sk); @@ -2035,6 +2207,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) spin_unlock(&nhslot2->lock); } + if (udp_hashed4(sk)) { + udp_rehash4(udptable, sk, newhash4); + + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + udp_hash4_inc(nhslot2); + spin_unlock(&nhslot2->lock); + } + } spin_unlock_bh(&hslot->lock); } } @@ -2046,7 +2231,11 @@ void udp_v4_rehash(struct sock *sk) u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); - udp_lib_rehash(sk, new_hash); + u16 new_hash4 = udp_ehashfn(sock_net(sk), + sk->sk_rcv_saddr, sk->sk_num, + sk->sk_daddr, sk->sk_dport); + + udp_lib_rehash(sk, new_hash, new_hash4); } static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -2268,7 +2457,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, udptable->mask; hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udptable->hash2[hash2]; + hslot = &udptable->hash2[hash2].hslot; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } @@ -2539,14 +2728,13 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, struct udp_table *udptable = net->ipv4.udp_table; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); unsigned short hnum = ntohs(loc_port); - unsigned int hash2, slot2; struct udp_hslot *hslot2; + unsigned int hash2; __portpair ports; struct sock *sk; hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; + hslot2 = udp_hashslot2(udptable, hash2); ports = INET_COMBINED_PORTS(rmt_port, hnum); udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { @@ -2621,7 +2809,7 @@ int udp_v4_early_demux(struct sk_buff *skb) if (!inet_sk(sk)->inet_daddr && in_dev) return ip_mc_validate_source(skb, iph->daddr, iph->saddr, - iph->tos & INET_DSCP_MASK, + ip4h_dscp(iph), skb->dev, in_dev, &itag); } return 0; @@ -2940,7 +3128,7 @@ struct proto udp_prot = { .owner = THIS_MODULE, .close = udp_lib_close, .pre_connect = udp_pre_connect, - .connect = ip4_datagram_connect, + .connect = udp_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udp_init_sock, @@ -3187,7 +3375,7 @@ again: batch_sks = 0; for (; state->bucket <= udptable->mask; state->bucket++) { - struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; + struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot; if (hlist_empty(&hslot2->head)) continue; @@ -3428,10 +3616,12 @@ __setup("uhash_entries=", set_uhash_entries); void __init udp_table_init(struct udp_table *table, const char *name) { - unsigned int i; + unsigned int i, slot_size; + slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) + + udp_hash4_slot_size(); table->hash = alloc_large_system_hash(name, - 2 * sizeof(struct udp_hslot), + slot_size, uhash_entries, 21, /* one slot per 2 MB */ 0, @@ -3440,17 +3630,18 @@ void __init udp_table_init(struct udp_table *table, const char *name) UDP_HTABLE_SIZE_MIN, UDP_HTABLE_SIZE_MAX); - table->hash2 = table->hash + (table->mask + 1); + table->hash2 = (void *)(table->hash + (table->mask + 1)); for (i = 0; i <= table->mask; i++) { INIT_HLIST_HEAD(&table->hash[i].head); table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } for (i = 0; i <= table->mask; i++) { - INIT_HLIST_HEAD(&table->hash2[i].head); - table->hash2[i].count = 0; - spin_lock_init(&table->hash2[i].lock); + INIT_HLIST_HEAD(&table->hash2[i].hslot.head); + table->hash2[i].hslot.count = 0; + spin_lock_init(&table->hash2[i].hslot.lock); } + udp_table_hash4_init(table); } u32 udp_flow_hashrnd(void) @@ -3476,18 +3667,21 @@ static void __net_init udp_sysctl_init(struct net *net) static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries) { struct udp_table *udptable; + unsigned int slot_size; int i; udptable = kmalloc(sizeof(*udptable), GFP_KERNEL); if (!udptable) goto out; - udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot), + slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) + + udp_hash4_slot_size(); + udptable->hash = vmalloc_huge(hash_entries * slot_size, GFP_KERNEL_ACCOUNT); if (!udptable->hash) goto free_table; - udptable->hash2 = udptable->hash + hash_entries; + udptable->hash2 = (void *)(udptable->hash + hash_entries); udptable->mask = hash_entries - 1; udptable->log = ilog2(hash_entries); @@ -3496,10 +3690,11 @@ static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_ent udptable->hash[i].count = 0; spin_lock_init(&udptable->hash[i].lock); - INIT_HLIST_HEAD(&udptable->hash2[i].head); - udptable->hash2[i].count = 0; - spin_lock_init(&udptable->hash2[i].lock); + INIT_HLIST_HEAD(&udptable->hash2[i].hslot.head); + udptable->hash2[i].hslot.count = 0; + spin_lock_init(&udptable->hash2[i].hslot.lock); } + udp_table_hash4_init(udptable); return udptable; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index a620618cc568..b5b06323cfd9 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -33,7 +33,7 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + ip4h_dscp(iph), skb->dev)) goto drop; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 7e1c2faed1ff..7fb6205619e7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -14,6 +14,7 @@ #include <linux/inetdevice.h> #include <net/dst.h> #include <net/xfrm.h> +#include <net/inet_dscp.h> #include <net/ip.h> #include <net/l3mdev.h> @@ -24,7 +25,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = params->daddr->a4; - fl4->flowi4_tos = params->tos; + fl4->flowi4_tos = inet_dscp_to_dsfield(params->dscp); fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net, params->oif); fl4->flowi4_mark = params->mark; diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index b146ce88c5d0..4ee624d8e66f 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -76,7 +76,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + ip4h_dscp(iph), skb->dev)) goto drop; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 94dceac52884..96b5b2b0d507 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1016,7 +1016,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr) { - u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net)); return hash_32(val, IN6_ADDR_HSIZE_SHIFT); } @@ -4793,7 +4793,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!pfx) return -EINVAL; - ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags); /* We ignore other flags so far. */ ifa_flags &= IFA_F_MANAGETEMPADDR; @@ -5018,10 +5018,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (tb[IFA_FLAGS]) - cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]); - else - cfg.ifa_flags = ifm->ifa_flags; + cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags); /* We ignore other flags so far. */ cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | @@ -7406,6 +7403,27 @@ static struct rtnl_af_ops inet6_ops __read_mostly = { .set_link_af = inet6_set_link_af, }; +static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK, + .dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR, + .doit = inet6_rtm_newaddr}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR, + .doit = inet6_rtm_deladdr}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR, + .doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETMULTICAST, + .dumpit = inet6_dump_ifmcaddr, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETANYCAST, + .dumpit = inet6_dump_ifacaddr, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETNETCONF, + .doit = inet6_netconf_get_devconf, .dumpit = inet6_netconf_dump_devconf, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + /* * Init / cleanup code */ @@ -7447,44 +7465,14 @@ int __init addrconf_init(void) addrconf_verify(&init_net); - rtnl_af_register(&inet6_ops); + err = rtnl_af_register(&inet6_ops); + if (err) + goto erraf; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK, - NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) + err = rtnl_register_many(addrconf_rtnl_msg_handlers); + if (err) goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR, - inet6_rtm_newaddr, NULL, 0); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR, - inet6_rtm_deladdr, NULL, 0); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR, - inet6_rtm_getaddr, inet6_dump_ifaddr, - RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST, - NULL, inet6_dump_ifmcaddr, - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST, - NULL, inet6_dump_ifacaddr, - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, - inet6_netconf_get_devconf, - inet6_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; err = ipv6_addr_label_rtnl_register(); if (err < 0) goto errout; @@ -7493,6 +7481,7 @@ int __init addrconf_init(void) errout: rtnl_unregister_all(PF_INET6); rtnl_af_unregister(&inet6_ops); +erraf: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: destroy_workqueue(addrconf_wq); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index acd70b5992a7..ab054f329e12 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -634,23 +634,17 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, return err; } +static const struct rtnl_msg_handler ipv6_adddr_label_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDRLABEL, + .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDRLABEL, + .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDRLABEL, + .doit = ip6addrlbl_get, .dumpit = ip6addrlbl_dump, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + int __init ipv6_addr_label_rtnl_register(void) { - int ret; - - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL, - ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - if (ret < 0) - return ret; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL, - ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - if (ret < 0) - return ret; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL, - ip6addrlbl_get, - ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); - return ret; + return rtnl_register_many(ipv6_adddr_label_rtnl_msg_handlers); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ba69b86f1c7d..f60ec8b0f8ea 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -252,31 +252,29 @@ lookup_protocol: */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 0627c4c18d1a..562cace50ca9 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -49,9 +49,10 @@ static DEFINE_SPINLOCK(acaddr_hash_lock); static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); -static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +static u32 inet6_acaddr_hash(const struct net *net, + const struct in6_addr *addr) { - u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net)); return hash_32(val, IN6_ADDR_HSIZE_SHIFT); } diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 919ebfabbe4e..7b41fb4f00b5 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, if (sp->len == XFRM_MAX_DEPTH) goto out_reset; - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, - (xfrm_address_t *)&ipv6_hdr(skb)->daddr, - spi, IPPROTO_ESP, AF_INET6); + x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET6); if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) { /* non-offload path will record the error and audit log */ diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index f87ae33e1d01..949b72610df7 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -22,7 +22,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, return call_fib_notifiers(net, event_type, info); } -static unsigned int fib6_seq_read(struct net *net) +static unsigned int fib6_seq_read(const struct net *net) { return fib6_tables_seq_read(net) + fib6_rules_seq_read(net); } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 04a9ed5e8310..c85c1627cb16 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -56,7 +56,7 @@ int fib6_rules_dump(struct net *net, struct notifier_block *nb, return fib_rules_dump(net, nb, AF_INET6, extack); } -unsigned int fib6_rules_seq_read(struct net *net) +unsigned int fib6_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, AF_INET6); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 534a4498e280..7646e401c630 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -105,16 +105,11 @@ static int parse_nl_config(struct genl_info *info, xp->ip.locator_match.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR_MATCH]); - if (info->attrs[ILA_ATTR_CSUM_MODE]) - xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); - else - xp->ip.csum_mode = ILA_CSUM_NO_ACTION; - - if (info->attrs[ILA_ATTR_IDENT_TYPE]) - xp->ip.ident_type = nla_get_u8( - info->attrs[ILA_ATTR_IDENT_TYPE]); - else - xp->ip.ident_type = ILA_ATYPE_USE_FORMAT; + xp->ip.csum_mode = nla_get_u8_default(info->attrs[ILA_ATTR_CSUM_MODE], + ILA_CSUM_NO_ACTION); + + xp->ip.ident_type = nla_get_u8_default(info->attrs[ILA_ATTR_IDENT_TYPE], + ILA_ATYPE_USE_FORMAT); if (info->attrs[ILA_ATTR_IFINDEX]) xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 08c929513065..a84d332f952f 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -135,15 +135,11 @@ static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info) ns->id = id; - if (!info->attrs[IOAM6_ATTR_NS_DATA]) - data32 = IOAM6_U32_UNAVAILABLE; - else - data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]); - - if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE]) - data64 = IOAM6_U64_UNAVAILABLE; - else - data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]); + data32 = nla_get_u32_default(info->attrs[IOAM6_ATTR_NS_DATA], + IOAM6_U32_UNAVAILABLE); + + data64 = nla_get_u64_default(info->attrs[IOAM6_ATTR_NS_DATA_WIDE], + IOAM6_U64_UNAVAILABLE); ns->data = cpu_to_be32(data32); ns->data_wide = cpu_to_be64(data64); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index beb6b4cfc551..9d8422e350f8 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -142,10 +142,8 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, } } - if (!tb[IOAM6_IPTUNNEL_MODE]) - mode = IOAM6_IPTUNNEL_MODE_INLINE; - else - mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]); + mode = nla_get_u8_default(tb[IOAM6_IPTUNNEL_MODE], + IOAM6_IPTUNNEL_MODE_INLINE); if (tb[IOAM6_IPTUNNEL_SRC] && mode == IOAM6_IPTUNNEL_MODE_INLINE) { NL_SET_ERR_MSG(extack, "no tunnel src expected with this mode"); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index eb111d20615c..c134ba202c4c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -198,16 +198,9 @@ static void node_free_immediate(struct net *net, struct fib6_node *fn) net->ipv6.rt6_stats->fib_nodes--; } -static void node_free_rcu(struct rcu_head *head) -{ - struct fib6_node *fn = container_of(head, struct fib6_node, rcu); - - kmem_cache_free(fib6_node_kmem, fn); -} - static void node_free(struct net *net, struct fib6_node *fn) { - call_rcu(&fn->rcu, node_free_rcu); + kfree_rcu(fn, rcu); net->ipv6.rt6_stats->fib_nodes--; } @@ -345,17 +338,17 @@ static void __net_init fib6_tables_init(struct net *net) #endif -unsigned int fib6_tables_seq_read(struct net *net) +unsigned int fib6_tables_seq_read(const struct net *net) { unsigned int h, fib_seq = 0; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - struct hlist_head *head = &net->ipv6.fib_table_hash[h]; - struct fib6_table *tb; + const struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + const struct fib6_table *tb; hlist_for_each_entry_rcu(tb, head, tb6_hlist) - fib_seq += tb->fib_seq; + fib_seq += READ_ONCE(tb->fib_seq); } rcu_read_unlock(); @@ -400,7 +393,7 @@ int call_fib6_entry_notifiers(struct net *net, .rt = rt, }; - rt->fib6_table->fib_seq++; + WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1); return call_fib6_notifiers(net, event_type, &info.info); } @@ -416,7 +409,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net, .nsiblings = nsiblings, }; - rt->fib6_table->fib_seq++; + WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1); return call_fib6_notifiers(net, event_type, &info.info); } @@ -427,7 +420,7 @@ int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt) .nsiblings = rt->fib6_nsiblings, }; - rt->fib6_table->fib_seq++; + WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1); return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info); } @@ -1190,8 +1183,8 @@ next_iter: while (sibling) { if (sibling->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->fib6_siblings, - &sibling->fib6_siblings); + list_add_tail_rcu(&rt->fib6_siblings, + &sibling->fib6_siblings); break; } sibling = rcu_dereference_protected(sibling->fib6_next, @@ -1252,7 +1245,7 @@ add: fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); return err; } @@ -1970,7 +1963,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, &rt->fib6_siblings, fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } @@ -2500,6 +2493,12 @@ static struct pernet_operations fib6_net_ops = { .exit = fib6_net_exit, }; +static const struct rtnl_msg_handler fib6_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE, + .dumpit = inet6_dump_fib, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, +}; + int __init fib6_init(void) { int ret = -ENOMEM; @@ -2513,9 +2512,7 @@ int __init fib6_init(void) if (ret) goto out_kmem_cache_create; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, - inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED | - RTNL_FLAG_DUMP_SPLIT_NLM_DONE); + ret = rtnl_register_many(fib6_rtnl_msg_handlers); if (ret) goto out_unregister_subsys; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f26841f1490f..f7b4608bb316 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -127,7 +127,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); - if (unlikely(IS_ERR_OR_NULL(neigh))) { + if (IS_ERR_OR_NULL(neigh)) { if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { @@ -1401,8 +1401,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; - sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); - + sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); + if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { + cork->base.flags |= IPCORK_TS_OPT_ID; + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; + } cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; @@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk, bool zc = false; u32 tskey = 0; struct rt6_info *rt = dst_rt6_info(cork->dst); - bool paged, hold_tskey, extra_uref = false; + bool paged, hold_tskey = false, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; @@ -1543,10 +1546,15 @@ emsgsize: flags &= ~MSG_SPLICE_PAGES; } - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* * Let's try using as much space as possible. diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b60e13c42bca..48fd53b98972 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -630,8 +630,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } skb_dst_set(skb2, &rt->dst); } else { - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, - skb2->dev) || + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, + ip4h_dscp(eiph), skb2->dev) || skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2ce4ae0d8dc3..d66f58932a79 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -276,7 +276,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack); } -static unsigned int ip6mr_rules_seq_read(struct net *net) +static unsigned int ip6mr_rules_seq_read(const struct net *net) { return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR); } @@ -335,7 +335,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, return 0; } -static unsigned int ip6mr_rules_seq_read(struct net *net) +static unsigned int ip6mr_rules_seq_read(const struct net *net) { return 0; } @@ -1260,11 +1260,9 @@ static int ip6mr_device_event(struct notifier_block *this, return NOTIFY_DONE; } -static unsigned int ip6mr_seq_read(struct net *net) +static unsigned int ip6mr_seq_read(const struct net *net) { - ASSERT_RTNL(); - - return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net); + return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); } static int ip6mr_dump(struct net *net, struct notifier_block *nb, @@ -1369,6 +1367,12 @@ static struct pernet_operations ip6mr_net_ops = { .exit_batch = ip6mr_net_exit_batch, }; +static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, + .msgtype = RTM_GETROUTE, + .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute}, +}; + int __init ip6_mr_init(void) { int err; @@ -1391,9 +1395,8 @@ int __init ip6_mr_init(void) goto add_proto_fail; } #endif - err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, - ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0); - if (err == 0) + err = rtnl_register_many(ip6mr_rtnl_msg_handlers); + if (!err) return 0; #ifdef CONFIG_IPV6_PIMSM_V2 @@ -1408,9 +1411,9 @@ reg_pernet_fail: return err; } -void ip6_mr_cleanup(void) +void __init ip6_mr_cleanup(void) { - rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); + rtnl_unregister_many(ip6mr_rtnl_msg_handlers); #ifdef CONFIG_IPV6_PIMSM_V2 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); #endif @@ -2557,7 +2560,7 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, src = nla_get_in6_addr(tb[RTA_SRC]); if (tb[RTA_DST]) grp = nla_get_in6_addr(tb[RTA_DST]); - tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; + tableid = nla_get_u32_default(tb[RTA_TABLE], 0); mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); if (!mrt) { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index f3c8e2d918e1..e087a8e97ba7 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -8,7 +8,14 @@ menu "IPv6: Netfilter Configuration" # old sockopt interface and eval loop config IP6_NF_IPTABLES_LEGACY - tristate + tristate "Legacy IP6 tables support" + depends on INET && IPV6 + select NETFILTER_XTABLES + default n + help + ip6tables is a legacy packet classifier. + This is not needed if you are using iptables over nftables + (iptables-nft). config NF_SOCKET_IPV6 tristate "IPv6 socket lookup support" diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 608fa9d05b55..8476a3944a88 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -629,7 +629,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; - skb_setup_tx_timestamp(skb, sockc->tsflags); + skb_setup_tx_timestamp(skb, sockc); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b4251915585f..63d7681c929f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -374,6 +374,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { struct rt6_info *rt = dst_rt6_info(dst); struct inet6_dev *idev = rt->rt6i_idev; + struct fib6_info *from; if (idev && idev->dev != blackhole_netdev) { struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev); @@ -383,6 +384,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) in6_dev_put(idev); } } + from = unrcu_pointer(xchg(&rt->from, NULL)); + fib6_info_release(from); } static bool __rt6_check_expired(const struct rt6_info *rt) @@ -413,8 +416,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; + struct fib6_info *sibling; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -440,8 +443,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) goto out; - list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, - fib6_siblings) { + list_for_each_entry_rcu(sibling, &match->fib6_siblings, + fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; @@ -1455,7 +1458,6 @@ static DEFINE_SPINLOCK(rt6_exception_lock); static void rt6_remove_exception(struct rt6_exception_bucket *bucket, struct rt6_exception *rt6_ex) { - struct fib6_info *from; struct net *net; if (!bucket || !rt6_ex) @@ -1467,8 +1469,6 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL)); - fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); hlist_del_rcu(&rt6_ex->hlist); @@ -5195,14 +5195,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, * nexthop. Since sibling routes are always added at the end of * the list, find the first sibling of the last route appended */ + rcu_read_lock(); + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { - rt = list_first_entry(&rt_last->fib6_siblings, - struct fib6_info, - fib6_siblings); + rt = list_first_or_null_rcu(&rt_last->fib6_siblings, + struct fib6_info, + fib6_siblings); } if (rt) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); + + rcu_read_unlock(); } static bool ip6_route_mpath_should_notify(const struct fib6_info *rt) @@ -5547,17 +5551,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i) nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, &nexthop_len); } else { - struct fib6_info *sibling, *next_sibling; struct fib6_nh *nh = f6i->fib6_nh; + struct fib6_info *sibling; nexthop_len = 0; if (f6i->fib6_nsiblings) { rt6_nh_nlmsg_size(nh, &nexthop_len); - list_for_each_entry_safe(sibling, next_sibling, - &f6i->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); } + + rcu_read_unlock(); } nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } @@ -5721,7 +5729,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; } else if (rt->fib6_nsiblings) { - struct fib6_info *sibling, *next_sibling; + struct fib6_info *sibling; struct nlattr *mp; mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); @@ -5733,14 +5741,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, 0) < 0) goto nla_put_failure; - list_for_each_entry_safe(sibling, next_sibling, - &rt->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &rt->fib6_siblings, + fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, sibling->fib6_nh->fib_nh_weight, - AF_INET6, 0) < 0) + AF_INET6, 0) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } } + rcu_read_unlock(); + nla_nest_end(skb, mp); } else if (rt->nh) { if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) @@ -6177,7 +6192,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, err = -ENOBUFS; seq = info->nlh ? info->nlh->nlmsg_seq : 0; - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); + skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); if (!skb) goto errout; @@ -6190,7 +6205,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, goto errout; } rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, - info->nlh, gfp_any()); + info->nlh, GFP_ATOMIC); return; errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); @@ -6680,6 +6695,15 @@ static void bpf_iter_unregister(void) #endif #endif +static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE, + .doit = inet6_rtm_newroute}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE, + .doit = inet6_rtm_delroute}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE, + .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, +}; + int __init ip6_route_init(void) { int ret; @@ -6722,19 +6746,7 @@ int __init ip6_route_init(void) if (ret) goto fib6_rules_init; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, - inet6_rtm_newroute, NULL, 0); - if (ret < 0) - goto out_register_late_subsys; - - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, - inet6_rtm_delroute, NULL, 0); - if (ret < 0) - goto out_register_late_subsys; - - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, - inet6_rtm_getroute, NULL, - RTNL_FLAG_DOIT_UNLOCKED); + ret = rtnl_register_many(ip6_route_rtnl_msg_handlers); if (ret < 0) goto out_register_late_subsys; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index c74705ead984..ac1dbd492c22 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -954,10 +954,10 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); + enum skb_drop_reason reason; struct seg6_local_lwt *slwt; struct iphdr *iph; __be32 nhaddr; - int err; slwt = seg6_local_lwtunnel(orig_dst->lwtstate); @@ -967,9 +967,9 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk, skb_dst_drop(skb); - err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); - if (err) { - kfree_skb(skb); + reason = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); + if (reason) { + kfree_skb_reason(skb, reason); return -EINVAL; } @@ -1174,8 +1174,8 @@ drop: static int input_action_end_dt4(struct sk_buff *skb, struct seg6_local_lwt *slwt) { + enum skb_drop_reason reason; struct iphdr *iph; - int err; if (!decap_and_validate(skb, IPPROTO_IPIP)) goto drop; @@ -1193,8 +1193,8 @@ static int input_action_end_dt4(struct sk_buff *skb, iph = ip_hdr(skb); - err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); - if (unlikely(err)) + reason = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); + if (unlikely(reason)) goto drop; return dst_input(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c9de5ef8f267..2debdf085a3b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -967,6 +967,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 } if (sk) { + /* unconstify the socket only to attach it to buff with care. */ + skb_set_owner_edemux(buff, (struct sock *)sk); + if (sk->sk_state == TCP_TIME_WAIT) mark = inet_twsk(sk)->tw_mark; else @@ -1169,8 +1172,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto out; if (aoh) - key.ao_key = tcp_ao_established_key(ao_info, - aoh->rnext_keyid, -1); + key.ao_key = tcp_ao_established_key(sk, ao_info, + aoh->rnext_keyid, -1); } } if (key.ao_key) { @@ -2175,6 +2178,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; + u8 icsk_pending; int rx_queue; int state; @@ -2183,12 +2187,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS || - icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || - icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + icsk_pending = smp_load_acquire(&icsk->icsk_pending); + if (icsk_pending == ICSK_TIME_RETRANS || + icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0cef8ae5d1ea..d766fd798ecf 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -110,8 +110,19 @@ void udp_v6_rehash(struct sock *sk) u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); + u16 new_hash4; - udp_lib_rehash(sk, new_hash); + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) { + new_hash4 = udp_ehashfn(sock_net(sk), + sk->sk_rcv_saddr, sk->sk_num, + sk->sk_daddr, sk->sk_dport); + } else { + new_hash4 = udp6_ehashfn(sock_net(sk), + &sk->sk_v6_rcv_saddr, sk->sk_num, + &sk->sk_v6_daddr, sk->sk_dport); + } + + udp_lib_rehash(sk, new_hash, new_hash4); } static int compute_score(struct sock *sk, const struct net *net, @@ -216,6 +227,74 @@ rescore: return result; } +#if IS_ENABLED(CONFIG_BASE_SMALL) +static struct sock *udp6_lib_lookup4(const struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned int hnum, int dif, int sdif, + struct udp_table *udptable) +{ + return NULL; +} + +static void udp6_hash4(struct sock *sk) +{ +} +#else /* !CONFIG_BASE_SMALL */ +static struct sock *udp6_lib_lookup4(const struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned int hnum, int dif, int sdif, + struct udp_table *udptable) +{ + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); + const struct hlist_nulls_node *node; + struct udp_hslot *hslot4; + unsigned int hash4, slot; + struct udp_sock *up; + struct sock *sk; + + hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport); + slot = hash4 & udptable->mask; + hslot4 = &udptable->hash4[slot]; + +begin: + udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) { + sk = (struct sock *)up; + if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) + return sk; + } + + /* if the nulls value we got at the end of this lookup is not the + * expected one, we must restart lookup. We probably met an item that + * was moved to another chain due to rehash. + */ + if (get_nulls_value(node) != slot) + goto begin; + + return NULL; +} + +static void udp6_hash4(struct sock *sk) +{ + struct net *net = sock_net(sk); + unsigned int hash; + + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) { + udp4_hash4(sk); + return; + } + + if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + return; + + hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num, + &sk->sk_v6_daddr, sk->sk_dport); + + udp_lib_hash4(sk, hash); +} +#endif /* CONFIG_BASE_SMALL */ + /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -224,13 +303,19 @@ struct sock *__udp6_lib_lookup(const struct net *net, struct sk_buff *skb) { unsigned short hnum = ntohs(dport); - unsigned int hash2, slot2; struct udp_hslot *hslot2; struct sock *result, *sk; + unsigned int hash2; hash2 = ipv6_portaddr_hash(net, daddr, hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; + hslot2 = udp_hashslot2(udptable, hash2); + + if (udp_has_hash4(hslot2)) { + result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum, + dif, sdif, udptable); + if (result) /* udp6_lib_lookup4 return sk or NULL */ + return result; + } /* Lookup connected or non-wildcard sockets */ result = udp6_lib_lookup2(net, saddr, sport, @@ -257,8 +342,7 @@ struct sock *__udp6_lib_lookup(const struct net *net, /* Lookup wildcard sockets */ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; + hslot2 = udp_hashslot2(udptable, hash2); result = udp6_lib_lookup2(net, saddr, sport, &in6addr_any, hnum, dif, sdif, @@ -859,7 +943,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, udptable->mask; hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udptable->hash2[hash2]; + hslot = &udptable->hash2[hash2].hslot; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } @@ -1065,14 +1149,13 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, { struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(loc_port); - unsigned int hash2, slot2; struct udp_hslot *hslot2; + unsigned int hash2; __portpair ports; struct sock *sk; hash2 = ipv6_portaddr_hash(net, loc_addr, hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; + hslot2 = udp_hashslot2(udptable, hash2); ports = INET_COMBINED_PORTS(rmt_port, hnum); udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { @@ -1169,6 +1252,18 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } +static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + if (!res) + udp6_hash4(sk); + release_sock(sk); + return res; +} + /** * udp6_hwcsum_outgoing - handle outgoing HW checksumming * @sk: socket we are sending on @@ -1764,7 +1859,7 @@ struct proto udpv6_prot = { .owner = THIS_MODULE, .close = udp_lib_close, .pre_connect = udpv6_pre_connect, - .connect = ip6_datagram_connect, + .connect = udpv6_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udpv6_init_sock, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d4118c796290..24aec295a51c 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1584,14 +1584,6 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } -static void free_mux(struct rcu_head *rcu) -{ - struct kcm_mux *mux = container_of(rcu, - struct kcm_mux, rcu); - - kmem_cache_free(kcm_muxp, mux); -} - static void release_mux(struct kcm_mux *mux) { struct kcm_net *knet = mux->knet; @@ -1619,7 +1611,7 @@ static void release_mux(struct kcm_mux *mux) knet->count--; mutex_unlock(&knet->mutex); - call_rcu(&mux->rcu, free_mux); + kfree_rcu(mux, rcu); } static void kcm_done(struct kcm_sock *kcm) diff --git a/net/key/af_key.c b/net/key/af_key.c index f79fb99271ed..c56bb4f451e6 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ } if (!x) - x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX, + proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x == NULL) return 0; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index fe7eab4b681b..f3fbe5a4395e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -170,28 +170,63 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t) rcu_read_unlock(); } -static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, - const struct ieee80211_addba_ext_ie *req, - u16 buf_size) +void ieee80211_add_addbaext(struct sk_buff *skb, + const u8 req_addba_ext_data, + u16 buf_size) { - struct ieee80211_addba_ext_ie *resp; + struct ieee80211_addba_ext_ie *addba_ext; u8 *pos; pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie)); *pos++ = WLAN_EID_ADDBA_EXT; *pos++ = sizeof(struct ieee80211_addba_ext_ie); - resp = (struct ieee80211_addba_ext_ie *)pos; - resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG; + addba_ext = (struct ieee80211_addba_ext_ie *)pos; - resp->data |= u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT, - IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); + addba_ext->data = IEEE80211_ADDBA_EXT_NO_FRAG; + if (req_addba_ext_data) + addba_ext->data &= req_addba_ext_data; + + addba_ext->data |= + u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT, + IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); +} + +u8 ieee80211_retrieve_addba_ext_data(struct sta_info *sta, + const void *elem_data, ssize_t elem_len, + u16 *buf_size) +{ + struct ieee802_11_elems *elems; + u8 buf_size_1k, data = 0; + + if (!sta->sta.deflink.he_cap.has_he) + return 0; + + if (elem_len <= 0) + return 0; + + elems = ieee802_11_parse_elems(elem_data, elem_len, true, NULL); + + if (elems && !elems->parse_error && elems->addba_ext_ie) { + data = elems->addba_ext_ie->data; + + if (!sta->sta.deflink.eht_cap.has_eht || !buf_size) + goto free; + + buf_size_1k = u8_get_bits(elems->addba_ext_ie->data, + IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); + *buf_size |= (u16)buf_size_1k << + IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT; + } +free: + kfree(elems); + + return data; } static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, u8 dialog_token, u16 status, u16 policy, u16 buf_size, u16 timeout, - const struct ieee80211_addba_ext_ie *addbaext) + const u8 req_addba_ext_data) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; @@ -223,8 +258,8 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - if (sta->sta.deflink.he_cap.has_he && addbaext) - ieee80211_add_addbaext(sdata, skb, addbaext, buf_size); + if (sta->sta.deflink.he_cap.has_he) + ieee80211_add_addbaext(skb, req_addba_ext_data, buf_size); ieee80211_tx_skb(sdata, skb); } @@ -233,7 +268,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext) + const u8 addba_ext_data) { struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; @@ -419,7 +454,7 @@ end: if (tx) ieee80211_send_addba_resp(sta, sta->sta.addr, tid, dialog_token, status, 1, buf_size, - timeout, addbaext); + timeout, addba_ext_data); } void ieee80211_process_addba_request(struct ieee80211_local *local, @@ -428,9 +463,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; - struct ieee802_11_elems *elems = NULL; - u8 dialog_token; - int ies_len; + u8 dialog_token, addba_ext_data; /* extract session parameters from addba request frame */ dialog_token = mgmt->u.action.u.addba_req.dialog_token; @@ -443,28 +476,17 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; - ies_len = len - offsetof(struct ieee80211_mgmt, - u.action.u.addba_req.variable); - if (ies_len) { - elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, - ies_len, true, NULL); - if (!elems || elems->parse_error) - goto free; - } - - if (sta->sta.deflink.eht_cap.has_eht && elems && elems->addba_ext_ie) { - u8 buf_size_1k = u8_get_bits(elems->addba_ext_ie->data, - IEEE80211_ADDBA_EXT_BUF_SIZE_MASK); - - buf_size |= buf_size_1k << IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT; - } + addba_ext_data = + ieee80211_retrieve_addba_ext_data(sta, + mgmt->u.action.u.addba_req.variable, + len - + offsetof(typeof(*mgmt), + u.action.u.addba_req.variable), + &buf_size); __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, - buf_size, true, false, - elems ? elems->addba_ext_ie : NULL); -free: - kfree(elems); + buf_size, true, false, addba_ext_data); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 1c18b862ef8c..61f2cac37728 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -58,23 +58,24 @@ * complete. */ -static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, - const u8 *da, u16 tid, +static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, u8 dialog_token, u16 start_seq_num, u16 agg_size, u16 timeout) { + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u16 capab; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - + skb = dev_alloc_skb(sizeof(*mgmt) + + 2 + sizeof(struct ieee80211_addba_ext_ie) + + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = ieee80211_mgmt_ba(skb, da, sdata); + mgmt = ieee80211_mgmt_ba(skb, sta->sta.addr, sdata); skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); @@ -93,6 +94,9 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); + if (sta->sta.deflink.he_cap.has_he) + ieee80211_add_addbaext(skb, 0, agg_size); + ieee80211_tx_skb_tid(sdata, skb, tid, -1); } @@ -460,8 +464,11 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, sta->ampdu_mlme.addba_req_num[tid]++; spin_unlock_bh(&sta->lock); - if (sta->sta.deflink.he_cap.has_he) { + if (sta->sta.deflink.eht_cap.has_eht) { buf_size = local->hw.max_tx_aggregation_subframes; + } else if (sta->sta.deflink.he_cap.has_he) { + buf_size = min_t(u16, local->hw.max_tx_aggregation_subframes, + IEEE80211_MAX_AMPDU_BUF_HE); } else { /* * We really should use what the driver told us it will @@ -473,9 +480,8 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, } /* send AddBA request */ - ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, tid_tx->ssn, - buf_size, tid_tx->timeout); + ieee80211_send_addba_request(sta, tid, tid_tx->dialog_token, + tid_tx->ssn, buf_size, tid_tx->timeout); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); } @@ -797,7 +803,7 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) { ieee80211_send_addba_with_timeout(sta, tid_tx); - /* RESPONSE_RECEIVED state whould trigger the flow again */ + /* RESPONSE_RECEIVED state would trigger the flow again */ return; } @@ -970,6 +976,13 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK); buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); + + ieee80211_retrieve_addba_ext_data(sta, + mgmt->u.action.u.addba_resp.variable, + len - offsetof(typeof(*mgmt), + u.action.u.addba_resp.variable), + &buf_size); + buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); txq = sta->sta.txq[tid]; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6dfc61a9acd4..61a824ec33da 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -105,8 +105,11 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, } /* also validate MU-MIMO change */ - monitor_sdata = wiphy_dereference(local->hw.wiphy, - local->monitor_sdata); + if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + monitor_sdata = sdata; + else + monitor_sdata = wiphy_dereference(local->hw.wiphy, + local->monitor_sdata); if (!monitor_sdata && (params->vht_mumimo_groups || params->vht_mumimo_follow_addr)) @@ -114,7 +117,9 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, /* apply all changes now - no failures allowed */ - if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + if (monitor_sdata && + (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { @@ -138,7 +143,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, } static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, - struct cfg80211_mbssid_config params, + struct cfg80211_mbssid_config *params, struct ieee80211_bss_conf *link_conf) { struct ieee80211_sub_if_data *tx_sdata; @@ -149,10 +154,10 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, link_conf->ema_ap = false; link_conf->bssid_indicator = 0; - if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) + if (sdata->vif.type != NL80211_IFTYPE_AP || !params->tx_wdev) return -EINVAL; - tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev); + tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params->tx_wdev); if (!tx_sdata) return -EINVAL; @@ -161,9 +166,9 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, } else { sdata->vif.mbssid_tx_vif = &tx_sdata->vif; link_conf->nontransmitted = true; - link_conf->bssid_index = params.index; + link_conf->bssid_index = params->index; } - if (params.ema) + if (params->ema) link_conf->ema_ap = true; return 0; @@ -194,6 +199,24 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, } } + /* Let the driver know that an interface is going to be added. + * Indicate so only for interface types that will be added to the + * driver. + */ + switch (type) { + case NL80211_IFTYPE_AP_VLAN: + break; + case NL80211_IFTYPE_MONITOR: + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) || + !(params->flags & MONITOR_FLAG_ACTIVE)) + break; + fallthrough; + default: + drv_prep_add_interface(local, + ieee80211_vif_type_p2p(&sdata->vif)); + break; + } + return wdev; } @@ -879,6 +902,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_set_monitor_channel(struct wiphy *wiphy, + struct net_device *dev, struct cfg80211_chan_def *chandef) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -888,22 +912,25 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, lockdep_assert_wiphy(local->hw.wiphy); - if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, - &chanreq.oper)) - return 0; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { + if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, + &chanreq.oper)) + return 0; - sdata = wiphy_dereference(local->hw.wiphy, - local->monitor_sdata); - if (!sdata) - goto done; + sdata = wiphy_dereference(wiphy, local->monitor_sdata); + if (!sdata) + goto done; + } - if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, + if (rcu_access_pointer(sdata->deflink.conf->chanctx_conf) && + cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, &chanreq.oper)) return 0; ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq, - IEEE80211_CHANCTX_EXCLUSIVE); + IEEE80211_CHANCTX_SHARED); if (ret) return ret; done: @@ -1294,9 +1321,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (old) return -EALREADY; - if (params->smps_mode != NL80211_SMPS_OFF) - return -EOPNOTSUPP; - link->smps_mode = IEEE80211_SMPS_OFF; link->needed_rx_chains = sdata->local->rx_chains; @@ -1390,7 +1414,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.type == NL80211_IFTYPE_AP && params->mbssid_config.tx_wdev) { err = ieee80211_set_ap_mbssid_options(sdata, - params->mbssid_config, + ¶ms->mbssid_config, link_conf); if (err) return err; @@ -1705,7 +1729,7 @@ static int sta_apply_auth_flags(struct ieee80211_local *local, * before drv_sta_state() is called. */ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) - rate_control_rate_init(sta); + rate_control_rate_init_all_links(sta); ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (ret) @@ -2134,7 +2158,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && test_sta_flag(sta, WLAN_STA_ASSOC)) - rate_control_rate_init(sta); + rate_control_rate_init_all_links(sta); return sta_info_insert(sta); } @@ -3046,14 +3070,31 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, enum nl80211_tx_power_setting txp_type = type; bool update_txp_type = false; bool has_monitor = false; + int user_power_level; int old_power = local->user_power_level; lockdep_assert_wiphy(local->hw.wiphy); + switch (type) { + case NL80211_TX_POWER_AUTOMATIC: + user_power_level = IEEE80211_UNSET_POWER_LEVEL; + txp_type = NL80211_TX_POWER_LIMITED; + break; + case NL80211_TX_POWER_LIMITED: + case NL80211_TX_POWER_FIXED: + if (mbm < 0 || (mbm % 100)) + return -EOPNOTSUPP; + user_power_level = MBM_TO_DBM(mbm); + break; + default: + return -EINVAL; + } + if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) return -EOPNOTSUPP; @@ -3063,57 +3104,67 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, return -EOPNOTSUPP; } - switch (type) { - case NL80211_TX_POWER_AUTOMATIC: - sdata->deflink.user_power_level = - IEEE80211_UNSET_POWER_LEVEL; - txp_type = NL80211_TX_POWER_LIMITED; - break; - case NL80211_TX_POWER_LIMITED: - case NL80211_TX_POWER_FIXED: - if (mbm < 0 || (mbm % 100)) - return -EOPNOTSUPP; - sdata->deflink.user_power_level = MBM_TO_DBM(mbm); - break; - } + for (int link_id = 0; + link_id < ARRAY_SIZE(sdata->link); + link_id++) { + struct ieee80211_link_data *link = + wiphy_dereference(wiphy, sdata->link[link_id]); - if (txp_type != sdata->vif.bss_conf.txpower_type) { - update_txp_type = true; - sdata->vif.bss_conf.txpower_type = txp_type; - } + if (!link) + continue; - ieee80211_recalc_txpower(sdata, update_txp_type); + link->user_power_level = user_power_level; + + if (txp_type != link->conf->txpower_type) { + update_txp_type = true; + link->conf->txpower_type = txp_type; + } + ieee80211_recalc_txpower(link, update_txp_type); + } return 0; } - switch (type) { - case NL80211_TX_POWER_AUTOMATIC: - local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; - txp_type = NL80211_TX_POWER_LIMITED; - break; - case NL80211_TX_POWER_LIMITED: - case NL80211_TX_POWER_FIXED: - if (mbm < 0 || (mbm % 100)) - return -EOPNOTSUPP; - local->user_power_level = MBM_TO_DBM(mbm); - break; - } + local->user_power_level = user_power_level; list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { has_monitor = true; continue; } - sdata->deflink.user_power_level = local->user_power_level; - if (txp_type != sdata->vif.bss_conf.txpower_type) - update_txp_type = true; - sdata->vif.bss_conf.txpower_type = txp_type; + + for (int link_id = 0; + link_id < ARRAY_SIZE(sdata->link); + link_id++) { + struct ieee80211_link_data *link = + wiphy_dereference(wiphy, sdata->link[link_id]); + + if (!link) + continue; + + link->user_power_level = local->user_power_level; + if (txp_type != link->conf->txpower_type) + update_txp_type = true; + link->conf->txpower_type = txp_type; + } } list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; - ieee80211_recalc_txpower(sdata, update_txp_type); + + for (int link_id = 0; + link_id < ARRAY_SIZE(sdata->link); + link_id++) { + struct ieee80211_link_data *link = + wiphy_dereference(wiphy, sdata->link[link_id]); + + if (!link) + continue; + + ieee80211_recalc_txpower(link, update_txp_type); + } } if (has_monitor) { @@ -3125,7 +3176,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, update_txp_type = true; sdata->vif.bss_conf.txpower_type = txp_type; - ieee80211_recalc_txpower(sdata, update_txp_type); + ieee80211_recalc_txpower(&sdata->deflink, + update_txp_type); } } @@ -4307,7 +4359,8 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, if (chanctx_conf) { *chandef = link->conf->chanreq.oper; ret = 0; - } else if (local->open_count > 0 && + } else if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) && + local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { *chandef = local->monitor_chanreq.oper; @@ -5030,6 +5083,13 @@ ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, return ret; } + if (test_sta_flag(sta, WLAN_STA_ASSOC)) { + struct link_sta_info *link_sta; + + link_sta = sdata_dereference(sta->link[params->link_id], sdata); + rate_control_rate_init(link_sta); + } + /* ieee80211_sta_activate_link frees the link upon failure */ return ieee80211_sta_activate_link(sta, params->link_id); } diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index cca6d14084d2..a442cb667520 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -323,22 +323,34 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, continue; switch (link->sdata->vif.type) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - width = ieee80211_get_max_required_bw(link); - break; case NL80211_IFTYPE_STATION: + if (!link->sdata->vif.cfg.assoc) { + /* + * The AP's sta->bandwidth may not yet be set + * at this point (pre-association), so simply + * take the width from the chandef. We cannot + * have TDLS peers yet (only after association). + */ + width = link->conf->chanreq.oper.width; + break; + } /* - * The ap's sta->bandwidth is not set yet at this - * point, so take the width from the chandef, but - * account also for TDLS peers + * otherwise just use min_def like in AP, depending on what + * we currently think the AP STA (and possibly TDLS peers) + * require(s) */ - width = max(link->conf->chanreq.oper.width, - ieee80211_get_max_required_bw(link)); + fallthrough; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + width = ieee80211_get_max_required_bw(link); break; case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: continue; + case NL80211_IFTYPE_MONITOR: + WARN_ON_ONCE(!ieee80211_hw_check(&local->hw, + NO_VIRTUAL_MONITOR)); + fallthrough; case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: @@ -347,7 +359,6 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: - case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: WARN_ON_ONCE(1); @@ -409,7 +420,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, if (!ctx->driver_present) return 0; - return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH; + return IEEE80211_CHANCTX_CHANGE_MIN_DEF; } static void ieee80211_chan_bw_change(struct ieee80211_local *local, @@ -462,12 +473,12 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, continue; /* vif changed to narrow BW and narrow BW for station wasn't - * requested or vise versa */ + * requested or vice versa */ if ((new_sta_bw < link_sta->pub->bandwidth) == !narrowed) continue; link_sta->pub->bandwidth = new_sta_bw; - rate_control_rate_update(local, sband, sta, link_id, + rate_control_rate_update(local, sband, link_sta, IEEE80211_RC_BW_CHANGED); } } @@ -905,7 +916,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, } if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) { - ieee80211_recalc_txpower(sdata, false); + ieee80211_recalc_txpower(link, false); ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL, false); } @@ -956,6 +967,10 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, if (!link->sdata->u.mgd.associated) continue; break; + case NL80211_IFTYPE_MONITOR: + if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + continue; + break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: @@ -968,6 +983,11 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf) continue; + if (link->sdata->vif.type == NL80211_IFTYPE_MONITOR) { + rx_chains_dynamic = rx_chains_static = local->rx_chains; + break; + } + switch (link->smps_mode) { default: WARN_ONCE(1, "Invalid SMPS mode %d\n", @@ -1118,7 +1138,7 @@ ieee80211_replace_chanctx(struct ieee80211_local *local, * * Consider ctx1..3, link1..6, each ctx has 2 links. link1 and * link2 from ctx1 request new different chandefs starting 2 - * in-place reserations with ctx4 and ctx5 replacing ctx1 and + * in-place reservations with ctx4 and ctx5 replacing ctx1 and * ctx2 respectively. Next link5 and link6 from ctx3 reserve * ctx4. If link3 and link4 remain on ctx2 as they are then this * fails unless `replace_ctx` from ctx5 is replaced with ctx3. @@ -1169,7 +1189,7 @@ ieee80211_replace_chanctx(struct ieee80211_local *local, static bool ieee80211_find_available_radio(struct ieee80211_local *local, const struct ieee80211_chan_req *chanreq, - int *radio_idx) + u32 radio_mask, int *radio_idx) { struct wiphy *wiphy = local->hw.wiphy; const struct wiphy_radio *radio; @@ -1180,6 +1200,9 @@ ieee80211_find_available_radio(struct ieee80211_local *local, return true; for (i = 0; i < wiphy->n_radio; i++) { + if (!(radio_mask & BIT(i))) + continue; + radio = &wiphy->radio[i]; if (!cfg80211_radio_chandef_valid(radio, &chanreq->oper)) continue; @@ -1213,7 +1236,9 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode); if (!new_ctx) { if (ieee80211_can_create_new_chanctx(local, -1) && - ieee80211_find_available_radio(local, chanreq, &radio_idx)) + ieee80211_find_available_radio(local, chanreq, + sdata->wdev.radio_mask, + &radio_idx)) new_ctx = ieee80211_new_chanctx(local, chanreq, mode, false, radio_idx); else @@ -1712,7 +1737,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) link, changed); - ieee80211_recalc_txpower(sdata, false); + ieee80211_recalc_txpower(link, false); } ieee80211_recalc_chanctx_chantype(local, ctx); @@ -1883,7 +1908,9 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, /* Note: context is now reserved */ if (ctx) reserved = true; - else if (!ieee80211_find_available_radio(local, chanreq, &radio_idx)) + else if (!ieee80211_find_available_radio(local, chanreq, + sdata->wdev.radio_mask, + &radio_idx)) ctx = ERR_PTR(-EBUSY); else ctx = ieee80211_new_chanctx(local, chanreq, mode, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 02b5476a4376..be2e486907f9 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -42,9 +42,8 @@ static ssize_t name## _read(struct file *file, char __user *userbuf, \ } #define DEBUGFS_READONLY_FILE_OPS(name) \ -static const struct file_operations name## _ops = { \ +static const struct debugfs_short_fops name## _ops = { \ .read = name## _read, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ }; @@ -142,10 +141,9 @@ static ssize_t aqm_write(struct file *file, return -EINVAL; } -static const struct file_operations aqm_ops = { +static const struct debugfs_short_fops aqm_ops = { .write = aqm_write, .read = aqm_read, - .open = simple_open, .llseek = default_llseek, }; @@ -194,10 +192,9 @@ static ssize_t airtime_flags_write(struct file *file, return count; } -static const struct file_operations airtime_flags_ops = { +static const struct debugfs_short_fops airtime_flags_ops = { .write = airtime_flags_write, .read = airtime_flags_read, - .open = simple_open, .llseek = default_llseek, }; @@ -225,9 +222,8 @@ static ssize_t aql_pending_read(struct file *file, buf, len); } -static const struct file_operations aql_pending_ops = { +static const struct debugfs_short_fops aql_pending_ops = { .read = aql_pending_read, - .open = simple_open, .llseek = default_llseek, }; @@ -305,10 +301,9 @@ static ssize_t aql_txq_limit_write(struct file *file, return count; } -static const struct file_operations aql_txq_limit_ops = { +static const struct debugfs_short_fops aql_txq_limit_ops = { .write = aql_txq_limit_write, .read = aql_txq_limit_read, - .open = simple_open, .llseek = default_llseek, }; @@ -355,10 +350,9 @@ static ssize_t aql_enable_write(struct file *file, const char __user *user_buf, return count; } -static const struct file_operations aql_enable_ops = { +static const struct debugfs_short_fops aql_enable_ops = { .write = aql_enable_write, .read = aql_enable_read, - .open = simple_open, .llseek = default_llseek, }; @@ -406,10 +400,9 @@ static ssize_t force_tx_status_write(struct file *file, return count; } -static const struct file_operations force_tx_status_ops = { +static const struct debugfs_short_fops force_tx_status_ops = { .write = force_tx_status_write, .read = force_tx_status_read, - .open = simple_open, .llseek = default_llseek, }; @@ -434,9 +427,8 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf, return count; } -static const struct file_operations reset_ops = { +static const struct debugfs_short_fops reset_ops = { .write = reset_write, - .open = simple_open, .llseek = noop_llseek, }; #endif @@ -456,6 +448,7 @@ static const char *hw_flag_names[] = { FLAG(SUPPORTS_DYNAMIC_PS), FLAG(MFP_CAPABLE), FLAG(WANT_MONITOR_VIF), + FLAG(NO_VIRTUAL_MONITOR), FLAG(NO_AUTO_VIF), FLAG(SW_CRYPTO_CONTROL), FLAG(SUPPORT_FAST_XMIT), @@ -623,9 +616,8 @@ static ssize_t stats_ ##name## _read(struct file *file, \ print_devstats_##name); \ } \ \ -static const struct file_operations stats_ ##name## _ops = { \ +static const struct debugfs_short_fops stats_ ##name## _ops = { \ .read = stats_ ##name## _read, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ }; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 7e54da508765..b3a64edea0f2 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -26,17 +26,15 @@ static ssize_t key_##name##_read(struct file *file, \ #define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n") #define KEY_OPS(name) \ -static const struct file_operations key_ ##name## _ops = { \ +static const struct debugfs_short_fops key_ ##name## _ops = { \ .read = key_##name##_read, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ } #define KEY_OPS_W(name) \ -static const struct file_operations key_ ##name## _ops = { \ +static const struct debugfs_short_fops key_ ##name## _ops = { \ .read = key_##name##_read, \ .write = key_##name##_write, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ } @@ -49,9 +47,8 @@ static const struct file_operations key_ ##name## _ops = { \ #define KEY_CONF_READ_D(name) KEY_CONF_READ(name, "%d\n") #define KEY_CONF_OPS(name) \ -static const struct file_operations key_ ##name## _ops = { \ +static const struct debugfs_short_fops key_ ##name## _ops = { \ .read = key_conf_##name##_read, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 68596ef78b15..a9bc2fd59f55 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -221,10 +221,9 @@ static ssize_t ieee80211_if_fmt_##name( \ } #define _IEEE80211_IF_FILE_OPS(name, _read, _write) \ -static const struct file_operations name##_ops = { \ +static const struct debugfs_short_fops name##_ops = { \ .read = (_read), \ .write = (_write), \ - .open = simple_open, \ .llseek = generic_file_llseek, \ } diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 1e9389c49a57..a67a9d316008 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -30,17 +30,15 @@ static ssize_t sta_ ##name## _read(struct file *file, \ #define STA_READ_D(name, field) STA_READ(name, field, "%d\n") #define STA_OPS(name) \ -static const struct file_operations sta_ ##name## _ops = { \ +static const struct debugfs_short_fops sta_ ##name## _ops = { \ .read = sta_##name##_read, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ } #define STA_OPS_RW(name) \ -static const struct file_operations sta_ ##name## _ops = { \ +static const struct debugfs_short_fops sta_ ##name## _ops = { \ .read = sta_##name##_read, \ .write = sta_##name##_write, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ } @@ -450,9 +448,8 @@ STA_OPS_RW(agg_status); /* link sta attributes */ #define LINK_STA_OPS(name) \ -static const struct file_operations link_sta_ ##name## _ops = { \ +static const struct debugfs_short_fops link_sta_ ##name## _ops = { \ .read = link_sta_##name##_read, \ - .open = simple_open, \ .llseek = generic_file_llseek, \ } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index fe868b521622..299d38e9e863 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -65,6 +65,7 @@ int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) && !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; @@ -181,9 +182,10 @@ int drv_sta_set_txpwr(struct ieee80211_local *local, return ret; } -void drv_sta_rc_update(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, u32 changed) +void drv_link_sta_rc_update(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_sta *link_sta, + u32 changed) { sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -193,10 +195,10 @@ void drv_sta_rc_update(struct ieee80211_local *local, (sdata->vif.type != NL80211_IFTYPE_ADHOC && sdata->vif.type != NL80211_IFTYPE_MESH_POINT)); - trace_drv_sta_rc_update(local, sdata, sta, changed); - if (local->ops->sta_rc_update) - local->ops->sta_rc_update(&local->hw, &sdata->vif, - sta, changed); + trace_drv_link_sta_rc_update(local, sdata, link_sta, changed); + if (local->ops->link_sta_rc_update) + local->ops->link_sta_rc_update(&local->hw, &sdata->vif, + link_sta, changed); trace_drv_return_void(local); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index d382d9729e85..edd1e4d4ad9d 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -594,9 +594,9 @@ int drv_sta_set_txpwr(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct sta_info *sta); -void drv_sta_rc_update(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, u32 changed); +void drv_link_sta_rc_update(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_sta *link_sta, u32 changed); static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1728,4 +1728,16 @@ drv_can_neg_ttlm(struct ieee80211_local *local, return res; } + +static inline void +drv_prep_add_interface(struct ieee80211_local *local, + enum nl80211_iftype type) +{ + trace_drv_prep_add_interface(local, type); + if (local->ops->prep_add_interface) + local->ops->prep_add_interface(&local->hw, type); + + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index ddc7acc68335..7a3116c36df9 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -2,7 +2,7 @@ /* * EHT handling * - * Copyright(c) 2021-2023 Intel Corporation + * Copyright(c) 2021-2024 Intel Corporation */ #include "ieee80211_i.h" @@ -75,4 +75,23 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + + switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0], + IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) { + case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454: + link_sta->pub->agg.max_amsdu_len = + IEEE80211_MAX_MPDU_LEN_VHT_11454; + break; + case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991: + link_sta->pub->agg.max_amsdu_len = + IEEE80211_MAX_MPDU_LEN_VHT_7991; + break; + case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895: + default: + link_sta->pub->agg.max_amsdu_len = + IEEE80211_MAX_MPDU_LEN_VHT_3895; + break; + } + + ieee80211_sta_recalc_aggregates(&link_sta->sta->sta); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 1c2b7dd8976a..32390d8a9d75 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -379,7 +379,7 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) sta->ampdu_mlme.tid_rx_manage_offl)) __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, IEEE80211_MAX_AMPDU_BUF_HT, - false, true, NULL); + false, true, 0); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 3f74bbceeca5..a1b4178deccf 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -569,7 +569,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) if (!sta->sdata->u.ibss.control_port) sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED); - rate_control_rate_init(sta); + rate_control_rate_init(&sta->deflink); /* If it fails, maybe we raced another insertion? */ if (sta_info_insert_rcu(sta)) @@ -1068,11 +1068,12 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, /* Force rx_nss recalculation */ sta->sta.deflink.rx_nss = 0; - rate_control_rate_init(sta); + rate_control_rate_init(&sta->deflink); if (sta->sta.deflink.rx_nss != rx_nss) changed |= IEEE80211_RC_NSS_CHANGED; - drv_sta_rc_update(local, sdata, &sta->sta, changed); + drv_link_sta_rc_update(local, sdata, &sta->sta.deflink, + changed); } rcu_read_unlock(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3d3c9139ff5e..a00096dd787b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1016,8 +1016,6 @@ struct ieee80211_link_data_managed { int wmm_last_param_set; int mu_edca_last_param_set; - - u8 bss_param_ch_cnt; }; struct ieee80211_link_data_ap { @@ -1371,7 +1369,7 @@ struct ieee80211_local { spinlock_t queue_stop_reason_lock; int open_count; - int monitors, cooked_mntrs; + int monitors, cooked_mntrs, tx_mntrs; /* number of interfaces with corresponding FIF_ flags */ int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll, fif_probe_req; @@ -2037,8 +2035,8 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata); int ieee80211_add_virtual_monitor(struct ieee80211_local *local); void ieee80211_del_virtual_monitor(struct ieee80211_local *local); -bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); -void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, +bool __ieee80211_recalc_txpower(struct ieee80211_link_data *link); +void ieee80211_recalc_txpower(struct ieee80211_link_data *link, bool update_bss); void ieee80211_recalc_offload(struct ieee80211_local *local); @@ -2115,14 +2113,19 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, const u8 *bssid, int link_id); bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old, enum ieee80211_smps_mode smps_mode_new); - +void ieee80211_add_addbaext(struct sk_buff *skb, + const u8 req_addba_ext_data, + u16 buf_size); +u8 ieee80211_retrieve_addba_ext_data(struct sta_info *sta, + const void *elem_data, ssize_t elem_len, + u16 *buf_size); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext); + const u8 addba_ext_data); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, @@ -2198,8 +2201,6 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta) return _ieee80211_sta_cur_vht_bw(link_sta, NULL); } void ieee80211_sta_init_nss(struct link_sta_info *link_sta); -enum ieee80211_sta_rx_bandwidth -ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width); enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta); void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, @@ -2454,7 +2455,7 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local) /* * If quiescing is set, we are racing with __ieee80211_suspend. * __ieee80211_suspend flushes the workers after setting quiescing, - * and we check quiescing / suspended before enqueing new workers. + * and we check quiescing / suspended before enqueuing new workers. * We should abort the worker to avoid the races below. */ if (local->quiescing) @@ -2547,8 +2548,8 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata); -u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef); -u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef, +u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef); +u8 *ieee80211_ie_build_eht_oper(u8 *pos, const struct cfg80211_chan_def *chandef, const struct ieee80211_sta_eht_cap *eht_cap); int ieee80211_parse_bitrates(enum nl80211_chan_width width, const struct ieee80211_supported_band *sband, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6ef0990d3d29..a8fbedd530f4 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -44,13 +44,13 @@ static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work); -bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) +bool __ieee80211_recalc_txpower(struct ieee80211_link_data *link) { struct ieee80211_chanctx_conf *chanctx_conf; int power; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); + chanctx_conf = rcu_dereference(link->conf->chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); return false; @@ -59,27 +59,26 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) power = ieee80211_chandef_max_power(&chanctx_conf->def); rcu_read_unlock(); - if (sdata->deflink.user_power_level != IEEE80211_UNSET_POWER_LEVEL) - power = min(power, sdata->deflink.user_power_level); + if (link->user_power_level != IEEE80211_UNSET_POWER_LEVEL) + power = min(power, link->user_power_level); - if (sdata->deflink.ap_power_level != IEEE80211_UNSET_POWER_LEVEL) - power = min(power, sdata->deflink.ap_power_level); + if (link->ap_power_level != IEEE80211_UNSET_POWER_LEVEL) + power = min(power, link->ap_power_level); - if (power != sdata->vif.bss_conf.txpower) { - sdata->vif.bss_conf.txpower = power; - ieee80211_hw_config(sdata->local, 0); + if (power != link->conf->txpower) { + link->conf->txpower = power; return true; } return false; } -void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, +void ieee80211_recalc_txpower(struct ieee80211_link_data *link, bool update_bss) { - if (__ieee80211_recalc_txpower(sdata) || - (update_bss && ieee80211_sdata_running(sdata))) - ieee80211_link_info_change_notify(sdata, &sdata->deflink, + if (__ieee80211_recalc_txpower(link) || + (update_bss && ieee80211_sdata_running(link->sdata))) + ieee80211_link_info_change_notify(link->sdata, link, BSS_CHANGED_TXPOWER); } @@ -279,8 +278,13 @@ static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata, ret = eth_mac_addr(sdata->dev, sa); if (ret == 0) { - memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN); - ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr); + if (check_dup) { + memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN); + ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr); + } else { + memset(sdata->vif.addr, 0, ETH_ALEN); + memset(sdata->vif.bss_conf.addr, 0, ETH_ALEN); + } } /* Regardless of eth_mac_addr() return we still want to add the @@ -699,9 +703,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do ieee80211_recalc_idle(local); ieee80211_recalc_offload(local); - if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) break; + ieee80211_link_release_channel(&sdata->deflink); fallthrough; default: if (!going_down) @@ -1087,6 +1093,8 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, ADJUST(CONTROL, control); ADJUST(CONTROL, pspoll); ADJUST(OTHER_BSS, other_bss); + if (!(flags & MONITOR_FLAG_SKIP_TX)) + local->tx_mntrs += offset; #undef ADJUST } @@ -1131,7 +1139,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); - if (local->monitor_sdata) + if (local->monitor_sdata || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) return 0; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL); @@ -1193,6 +1202,9 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + return; + ASSERT_RTNL(); lockdep_assert_wiphy(local->hw.wiphy); @@ -1328,7 +1340,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } - if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { res = drv_add_interface(local, sdata); if (res) goto err_stop; @@ -2176,9 +2189,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_set_default_queues(sdata); - sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL; - sdata->deflink.user_power_level = local->user_power_level; - /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 46092fbcde90..58a76bcd6ae6 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -36,6 +36,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, link->conf = link_conf; link_conf->link_id = link_id; link_conf->vif = &sdata->vif; + link->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; + link->user_power_level = sdata->local->user_power_level; + link_conf->txpower = INT_MIN; wiphy_work_init(&link->csa.finalize_work, ieee80211_csa_finalize_work); @@ -386,6 +389,37 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, jiffies); } + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + struct ieee80211_link_data *link; + + link = sdata_dereference(sdata->link[link_id], sdata); + + /* + * This call really should not fail. Unfortunately, it appears + * that this may happen occasionally with some drivers. Should + * it happen, we are stuck in a bad place as going backwards is + * not really feasible. + * + * So lets just tell link_use_channel that it must not fail to + * assign the channel context (from mac80211's perspective) and + * assume the driver is going to trigger a recovery flow if it + * had a failure. + * That really is not great nor guaranteed to work. But at least + * the internal mac80211 state remains consistent and there is + * a chance that we can recover. + */ + ret = _ieee80211_link_use_channel(link, + &link->conf->chanreq, + IEEE80211_CHANCTX_SHARED, + true); + WARN_ON_ONCE(ret); + + /* + * inform about the link info changed parameters after all + * stations are also added + */ + } + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata) continue; @@ -429,26 +463,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, link = sdata_dereference(sdata->link[link_id], sdata); - /* - * This call really should not fail. Unfortunately, it appears - * that this may happen occasionally with some drivers. Should - * it happen, we are stuck in a bad place as going backwards is - * not really feasible. - * - * So lets just tell link_use_channel that it must not fail to - * assign the channel context (from mac80211's perspective) and - * assume the driver is going to trigger a recovery flow if it - * had a failure. - * That really is not great nor guaranteed to work. But at least - * the internal mac80211 state remains consistent and there is - * a chance that we can recover. - */ - ret = _ieee80211_link_use_channel(link, - &link->conf->chanreq, - IEEE80211_CHANCTX_SHARED, - true); - WARN_ON_ONCE(ret); - ieee80211_mgd_set_link_qos_params(link); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_ERP_CTS_PROT | diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 640239f4425b..cb5f16366b9c 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1482,7 +1482,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (!elems) return; - /* ignore non-mesh or secure / unsecure mismatch */ + /* ignore non-mesh or secure / insecure mismatch */ if ((!elems->mesh_id || !elems->mesh_config) || (elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) || (!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 579d0f24ac9d..4e9546e998b6 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -220,12 +220,12 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, /** * mesh_path_error_tx - Sends a PERR mesh management frame * + * @sdata: local mesh subif * @ttl: allowed remaining hops * @target: broken destination * @target_sn: SN of the broken destination * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to - * @sdata: local mesh subif * * Note: This function may be called with driver locks taken that the driver * also acquires in the TX path. To avoid a deadlock we don't transmit the @@ -1137,8 +1137,8 @@ enddiscovery: /** * mesh_nexthop_resolve - lookup next hop; conditionally start path discovery * - * @skb: 802.11 frame to be sent * @sdata: network subif the frame will be sent through + * @skb: 802.11 frame to be sent * * Lookup next hop for given skb and start path discovery if no * forwarding information is found. @@ -1245,8 +1245,8 @@ void mesh_path_refresh(struct ieee80211_sub_if_data *sdata, * this function is considered "using" the associated mpath, so preempt a path * refresh if this mpath expires soon. * - * @skb: 802.11 frame to be sent * @sdata: network subif the frame will be sent through + * @skb: 802.11 frame to be sent * * Returns: 0 if the next hop was found. Nonzero otherwise. */ diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 30c0d89203af..9f9cb5af0a97 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -300,8 +300,8 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index - * @idx: index * @sdata: local subif, or NULL for all entries + * @idx: index * * Returns: pointer to the mesh path structure, or NULL if not found. * @@ -315,8 +315,8 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) /** * mpp_path_lookup_by_idx - look up a path in the proxy path table by its index - * @idx: index * @sdata: local subif, or NULL for all entries + * @idx: index * * Returns: pointer to the proxy path structure, or NULL if not found. * @@ -670,8 +670,8 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, /** * mesh_path_add - allocate and add a new path to the mesh path table - * @dst: destination address of the path (ETH_ALEN length) * @sdata: local subif + * @dst: destination address of the path (ETH_ALEN length) * * Returns: 0 on success * @@ -916,8 +916,8 @@ static int table_path_del(struct mesh_table *tbl, /** * mesh_path_del - delete a mesh path from the table * - * @addr: dst address (ETH_ALEN length) * @sdata: local subif + * @addr: dst address (ETH_ALEN length) * * Returns: 0 if successful */ @@ -996,8 +996,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) /** * mesh_path_discard_frame - discard a frame whose path could not be resolved * - * @skb: frame to discard * @sdata: network subif the frame was to be sent through + * @skb: frame to discard * * Locking: the function must me called within a rcu_read_lock region */ diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 8f2b492a9fe9..6ea35c88dc48 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -486,10 +486,11 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20; } + /* FIXME: this check is wrong without SW rate control */ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) - rate_control_rate_init(sta); + rate_control_rate_init(&sta->deflink); else - rate_control_rate_update(local, sband, sta, 0, changed); + rate_control_rate_update(local, sband, &sta->deflink, changed); out: spin_unlock_bh(&sta->mesh->plink_lock); } @@ -667,7 +668,7 @@ void mesh_plink_timer(struct timer_list *t) /* * This STA is valid because sta_info_destroy() will * del_timer_sync() this timer after having made sure - * it cannot be readded (by deleting the plink.) + * it cannot be re-added (by deleting the plink.) */ sta = mesh->plink_sta; diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 8cf3f395f52f..3a66b4cefca7 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -175,7 +175,7 @@ static void mesh_sync_offset_adjust_tsf(struct ieee80211_sub_if_data *sdata, spin_lock_bh(&ifmsh->sync_offset_lock); if (ifmsh->sync_offset_clockdrift_max > TOFFSET_MINIMUM_ADJUSTMENT) { - /* Since ajusting the tsf here would + /* Since adjusting the tsf here would * require a possibly blocking call * to the driver tsf setter, we punt * the tsf adjustment to the mesh tasklet diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0303972c23e4..480b664151c9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -31,6 +31,8 @@ #include "led.h" #include "fils_aead.h" +#include <kunit/static_stub.h> + #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) #define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) @@ -2643,9 +2645,91 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, &ifmgd->csa_connection_drop_work); } +struct sta_bss_param_ch_cnt_data { + struct ieee80211_sub_if_data *sdata; + u8 reporting_link_id; + u8 mld_id; +}; + +static enum cfg80211_rnr_iter_ret +ieee80211_sta_bss_param_ch_cnt_iter(void *_data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len) +{ + struct sta_bss_param_ch_cnt_data *data = _data; + struct ieee80211_sub_if_data *sdata = data->sdata; + const struct ieee80211_tbtt_info_ge_11 *ti; + u8 bss_param_ch_cnt; + int link_id; + + if (type != IEEE80211_TBTT_INFO_TYPE_TBTT) + return RNR_ITER_CONTINUE; + + if (tbtt_info_len < sizeof(*ti)) + return RNR_ITER_CONTINUE; + + ti = (const void *)tbtt_info; + + if (ti->mld_params.mld_id != data->mld_id) + return RNR_ITER_CONTINUE; + + link_id = le16_get_bits(ti->mld_params.params, + IEEE80211_RNR_MLD_PARAMS_LINK_ID); + bss_param_ch_cnt = + le16_get_bits(ti->mld_params.params, + IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT); + + if (bss_param_ch_cnt != 255 && + link_id < ARRAY_SIZE(sdata->link)) { + struct ieee80211_link_data *link = + sdata_dereference(sdata->link[link_id], sdata); + + if (link && link->conf->bss_param_ch_cnt != bss_param_ch_cnt) { + link->conf->bss_param_ch_cnt = bss_param_ch_cnt; + link->conf->bss_param_ch_cnt_link_id = + data->reporting_link_id; + } + } + + return RNR_ITER_CONTINUE; +} + +static void +ieee80211_mgd_update_bss_param_ch_cnt(struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *bss_conf, + struct ieee802_11_elems *elems) +{ + struct sta_bss_param_ch_cnt_data data = { + .reporting_link_id = bss_conf->link_id, + .sdata = sdata, + }; + int bss_param_ch_cnt; + + if (!elems->ml_basic) + return; + + data.mld_id = ieee80211_mle_get_mld_id((const void *)elems->ml_basic); + + cfg80211_iter_rnr(elems->ie_start, elems->total_len, + ieee80211_sta_bss_param_ch_cnt_iter, &data); + + bss_param_ch_cnt = + ieee80211_mle_get_bss_param_ch_cnt((const void *)elems->ml_basic); + + /* + * Update bss_param_ch_cnt_link_id even if bss_param_ch_cnt + * didn't change to indicate that we got a beacon on our own + * link. + */ + if (bss_param_ch_cnt >= 0 && bss_param_ch_cnt != 255) { + bss_conf->bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt_link_id = + bss_conf->link_id; + } +} + static bool -ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *channel, +ieee80211_find_80211h_pwr_constr(struct ieee80211_channel *channel, const u8 *country_ie, u8 country_ie_len, const u8 *pwr_constr_elem, int *chan_pwr, int *pwr_reduction) @@ -2715,8 +2799,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, return have_chan_pwr; } -static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *channel, +static void ieee80211_find_cisco_dtpc(struct ieee80211_channel *channel, const u8 *cisco_dtpc_ie, int *pwr_level) { @@ -2750,7 +2833,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link, (capab & cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT) || capab & cpu_to_le16(WLAN_CAPABILITY_RADIO_MEASURE))) { has_80211h_pwr = ieee80211_find_80211h_pwr_constr( - sdata, channel, country_ie, country_ie_len, + channel, country_ie, country_ie_len, pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h); pwr_level_80211h = max_t(int, 0, chan_pwr - pwr_reduction_80211h); @@ -2758,7 +2841,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link, if (cisco_dtpc_ie) { ieee80211_find_cisco_dtpc( - sdata, channel, cisco_dtpc_ie, &pwr_level_cisco); + channel, cisco_dtpc_ie, &pwr_level_cisco); has_cisco_pwr = true; } @@ -2791,7 +2874,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link, } link->ap_power_level = new_ap_level; - if (__ieee80211_recalc_txpower(sdata)) + if (__ieee80211_recalc_txpower(link)) return BSS_CHANGED_TXPOWER; return 0; } @@ -4101,8 +4184,13 @@ EXPORT_SYMBOL(ieee80211_beacon_loss); void ieee80211_connection_loss(struct ieee80211_vif *vif) { - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_hw *hw = &sdata->local->hw; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_hw *hw; + + KUNIT_STATIC_STUB_REDIRECT(ieee80211_connection_loss, vif); + + sdata = vif_to_sdata(vif); + hw = &sdata->local->hw; trace_api_connection_loss(sdata); @@ -4667,7 +4755,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, ret = false; goto out; } - link->u.mgd.bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt_link_id = link_id; } } else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC || !elems->prof || @@ -4677,6 +4766,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, } else { const u8 *ptr = elems->prof->variable + elems->prof->sta_info_len - 1; + int bss_param_ch_cnt; /* * During parsing, we validated that these fields exist, @@ -4684,8 +4774,10 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, */ capab_info = get_unaligned_le16(ptr); assoc_data->link[link_id].status = get_unaligned_le16(ptr + 2); - link->u.mgd.bss_param_ch_cnt = + bss_param_ch_cnt = ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(elems->prof); + bss_conf->bss_param_ch_cnt = bss_param_ch_cnt; + bss_conf->bss_param_ch_cnt_link_id = link_id; if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) { link_info(link, "association response status code=%u\n", @@ -5665,7 +5757,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, /* links might have changed due to rejected ones, set them again */ ieee80211_vif_set_links(sdata, valid_links, dormant_links); - rate_control_rate_init(sta); + rate_control_rate_init_all_links(sta); if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) { set_sta_flag(sta, WLAN_STA_MFP); @@ -6913,6 +7005,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, /* note that after this elems->ml_basic can no longer be used fully */ ieee80211_mgd_check_cross_link_csa(sdata, rx_status->link_id, elems); + ieee80211_mgd_update_bss_param_ch_cnt(sdata, bss_conf, elems); + if (!link->u.mgd.disable_wmm_tracking && ieee80211_sta_wmm_params(local, link, elems->wmm_param, elems->wmm_param_len, diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index f4c51e4a1e29..6218abc3e441 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -4,7 +4,7 @@ * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research - * Copyright (C) 2022 - 2023 Intel Corporation + * Copyright (C) 2022 - 2024 Intel Corporation * Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz> * Funded by: Volkswagen Group Research */ @@ -96,7 +96,7 @@ static struct sta_info *ieee80211_ocb_finish_sta(struct sta_info *sta) sta_info_move_state(sta, IEEE80211_STA_ASSOC); sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); - rate_control_rate_init(sta); + rate_control_rate_init(&sta->deflink); /* If it fails, maybe we raced another insertion? */ if (sta_info_insert_rcu(sta)) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 3dc9752188d5..0d056db9f81e 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -28,8 +28,9 @@ module_param(ieee80211_default_rc_algo, charp, 0644); MODULE_PARM_DESC(ieee80211_default_rc_algo, "Default rate control algorithm for mac80211 to use"); -void rate_control_rate_init(struct sta_info *sta) +void rate_control_rate_init(struct link_sta_info *link_sta) { + struct sta_info *sta = link_sta->sta; struct ieee80211_local *local = sta->sdata->local; struct rate_control_ref *ref = sta->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; @@ -37,11 +38,15 @@ void rate_control_rate_init(struct sta_info *sta) struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; - ieee80211_sta_init_nss(&sta->deflink); + ieee80211_sta_init_nss(link_sta); if (!ref) return; + /* SW rate control isn't supported with MLO right now */ + if (WARN_ON(ieee80211_vif_is_mld(&sta->sdata->vif))) + return; + rcu_read_lock(); chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf); @@ -67,6 +72,21 @@ void rate_control_rate_init(struct sta_info *sta) set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } +void rate_control_rate_init_all_links(struct sta_info *sta) +{ + int link_id; + + for (link_id = 0; link_id < ARRAY_SIZE(sta->link); link_id++) { + struct link_sta_info *link_sta; + + link_sta = sdata_dereference(sta->link[link_id], sta->sdata); + if (!link_sta) + continue; + + rate_control_rate_init(link_sta); + } +} + void rate_control_tx_status(struct ieee80211_local *local, struct ieee80211_tx_status *st) { @@ -93,16 +113,15 @@ void rate_control_tx_status(struct ieee80211_local *local, void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, unsigned int link_id, + struct link_sta_info *link_sta, u32 changed) { struct rate_control_ref *ref = local->rate_ctrl; + struct sta_info *sta = link_sta->sta; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_chanctx_conf *chanctx_conf; - WARN_ON(link_id != 0); - if (ref && ref->ops->rate_update) { rcu_read_lock(); @@ -120,7 +139,8 @@ void rate_control_rate_update(struct ieee80211_local *local, } if (sta->uploaded) - drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); + drv_link_sta_rc_update(local, sta->sdata, link_sta->pub, + changed); } int ieee80211_rate_control_register(const struct rate_control_ops *ops) @@ -229,9 +249,8 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf, ref->ops->name, len); } -const struct file_operations rcname_ops = { +const struct debugfs_short_fops rcname_ops = { .read = rcname_read, - .open = simple_open, .llseek = default_llseek, }; #endif diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index d6190f10fe7c..5e4bde598212 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022, 2024 Intel Corporation */ #ifndef IEEE80211_RATE_H @@ -29,11 +29,11 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, void rate_control_tx_status(struct ieee80211_local *local, struct ieee80211_tx_status *st); -void rate_control_rate_init(struct sta_info *sta); +void rate_control_rate_init(struct link_sta_info *link_sta); +void rate_control_rate_init_all_links(struct sta_info *sta); void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, - unsigned int link_id, + struct link_sta_info *link_sta, u32 changed); static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, @@ -62,7 +62,7 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) #endif } -extern const struct file_operations rcname_ops; +extern const struct debugfs_short_fops rcname_ops; static inline void rate_control_add_debugfs(struct ieee80211_local *local) { diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 6bf3b4444a43..706cbc99f718 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1053,7 +1053,7 @@ minstrel_ht_refill_sample_rates(struct minstrel_ht_sta *mi) * - max_prob_rate must use only one stream, as a tradeoff between delivery * probability and throughput during strong fluctuations * - as long as the max prob rate has a probability of more than 75%, pick - * higher throughput rates, even if the probablity is a bit lower + * higher throughput rates, even if the probability is a bit lower */ static void minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 694b43091fec..2bec18fc1b03 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -508,18 +508,13 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST; if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR) flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR; - if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) - flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN; if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN) flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN; if (status->flag & RX_FLAG_AMPDU_EOF_BIT) flags |= IEEE80211_RADIOTAP_AMPDU_EOF; put_unaligned_le16(flags, pos); pos += 2; - if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN) - *pos++ = status->ampdu_delimiter_crc; - else - *pos++ = 0; + *pos++ = 0; *pos++ = 0; } @@ -767,8 +762,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct ieee80211_rate *rate) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(origskb); - struct ieee80211_sub_if_data *sdata; - struct sk_buff *monskb = NULL; + struct ieee80211_sub_if_data *sdata, *prev_sdata = NULL; + struct sk_buff *skb, *monskb = NULL; int present_fcs_len = 0; unsigned int rtap_space = 0; struct ieee80211_sub_if_data *monitor_sdata = @@ -842,40 +837,52 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_space); list_for_each_entry_rcu(sdata, &local->mon_list, u.mntr.list) { - bool last_monitor = list_is_last(&sdata->u.mntr.list, - &local->mon_list); + struct cfg80211_chan_def *chandef; + + chandef = &sdata->vif.bss_conf.chanreq.oper; + if (chandef->chan && + chandef->chan->center_freq != status->freq) + continue; + + if (!prev_sdata) { + prev_sdata = sdata; + continue; + } + + if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + ieee80211_handle_mu_mimo_mon(sdata, origskb, rtap_space); if (!monskb) monskb = ieee80211_make_monitor_skb(local, &origskb, rate, rtap_space, - only_monitor && - last_monitor); + false); + if (!monskb) + continue; - if (monskb) { - struct sk_buff *skb; + skb = skb_clone(monskb, GFP_ATOMIC); + if (!skb) + continue; - if (last_monitor) { - skb = monskb; - monskb = NULL; - } else { - skb = skb_clone(monskb, GFP_ATOMIC); - } + skb->dev = prev_sdata->dev; + dev_sw_netstats_rx_add(skb->dev, skb->len); + netif_receive_skb(skb); + prev_sdata = sdata; + } - if (skb) { - skb->dev = sdata->dev; - dev_sw_netstats_rx_add(skb->dev, skb->len); - netif_receive_skb(skb); - } + if (prev_sdata) { + if (monskb) + skb = monskb; + else + skb = ieee80211_make_monitor_skb(local, &origskb, + rate, rtap_space, + only_monitor); + if (skb) { + skb->dev = prev_sdata->dev; + dev_sw_netstats_rx_add(skb->dev, skb->len); + netif_receive_skb(skb); } - - if (last_monitor) - break; } - /* this happens if last_monitor was erroneously false */ - dev_kfree_skb(monskb); - - /* ditto */ if (!origskb) return NULL; @@ -3568,7 +3575,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sband = rx->local->hw.wiphy->bands[status->band]; - rate_control_rate_update(local, sband, rx->sta, 0, + rate_control_rate_update(local, sband, rx->link_sta, IEEE80211_RC_SMPS_CHANGED); cfg80211_sta_opmode_change_notify(sdata->dev, rx->sta->addr, @@ -3605,7 +3612,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) ieee80211_sta_rx_bw_to_chan_width(rx->link_sta); sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED; - rate_control_rate_update(local, sband, rx->sta, 0, + rate_control_rate_update(local, sband, rx->link_sta, IEEE80211_RC_BW_CHANGED); cfg80211_sta_opmode_change_notify(sdata->dev, rx->sta->addr, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index adb88c06b598..cb7079071885 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -1176,14 +1176,14 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, unsigned int n_channels) { struct ieee80211_local *local = sdata->local; - int ret = -EBUSY, i, n_ch = 0; + int i, n_ch = 0; enum nl80211_band band; lockdep_assert_wiphy(local->hw.wiphy); /* busy scanning */ if (local->scan_req) - goto unlock; + return -EBUSY; /* fill internal scan request */ if (!channels) { @@ -1200,7 +1200,9 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, &local->hw.wiphy->bands[band]->channels[i]; if (tmp_ch->flags & (IEEE80211_CHAN_NO_IR | - IEEE80211_CHAN_DISABLED)) + IEEE80211_CHAN_DISABLED) || + !cfg80211_wdev_channel_allowed(&sdata->wdev, + tmp_ch)) continue; local->int_scan_req->channels[n_ch] = tmp_ch; @@ -1209,21 +1211,23 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, } if (WARN_ON_ONCE(n_ch == 0)) - goto unlock; + return -EINVAL; local->int_scan_req->n_channels = n_ch; } else { for (i = 0; i < n_channels; i++) { if (channels[i]->flags & (IEEE80211_CHAN_NO_IR | - IEEE80211_CHAN_DISABLED)) + IEEE80211_CHAN_DISABLED) || + !cfg80211_wdev_channel_allowed(&sdata->wdev, + channels[i])) continue; local->int_scan_req->channels[n_ch] = channels[i]; n_ch++; } - if (WARN_ON_ONCE(n_ch == 0)) - goto unlock; + if (n_ch == 0) + return -EINVAL; local->int_scan_req->n_channels = n_ch; } @@ -1233,9 +1237,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); local->int_scan_req->ssids[0].ssid_len = ssid_len; - ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); - unlock: - return ret; + return __ieee80211_start_scan(sdata, sdata->local->int_scan_req); } void ieee80211_scan_cancel(struct ieee80211_local *local) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 073ff9e0f397..c6015cd00372 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -377,13 +377,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, /* capture the AP chandef before (potential) downgrading */ csa_ie->chanreq.ap = new_chandef; - if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 && - new_chandef.width == NL80211_CHAN_WIDTH_320) - ieee80211_chandef_downgrade(&new_chandef, NULL); - - if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 && - (new_chandef.width == NL80211_CHAN_WIDTH_80P80 || - new_chandef.width == NL80211_CHAN_WIDTH_160)) + while (conn->bw_limit < + ieee80211_min_bw_limit_from_chandef(&new_chandef)) ieee80211_chandef_downgrade(&new_chandef, NULL); if (!cfg80211_chandef_compatible(&new_chandef, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9195d5a2de0a..9f89fb5bee37 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -169,7 +169,7 @@ struct sta_info; * @buf_size: reorder buffer size at receiver * @failed_bar_ssn: ssn of the last failed BAR tx attempt * @bar_pending: BAR needs to be re-sent - * @amsdu: support A-MSDU withing A-MDPU + * @amsdu: support A-MSDU within A-MDPU * @ssn: starting sequence number of the session * * This structure's lifetime is managed by RCU, assignments to diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b41b867f43b2..5f28f3633fa0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -927,6 +927,9 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->u.mntr.flags & MONITOR_FLAG_SKIP_TX) + continue; + if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) && !send_to_cooked) continue; @@ -1099,7 +1102,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, * This is a bit racy but we can avoid a lot of work * with this test... */ - if (!local->monitors && (!send_to_cooked || !local->cooked_mntrs)) { + if (!local->tx_mntrs && (!send_to_cooked || !local->cooked_mntrs)) { if (status->free_list) list_add_tail(&skb->list, status->free_list); else diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index f07b40916485..2f92e7c7f203 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1342,7 +1342,8 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, bw = min(bw, ieee80211_sta_cap_rx_bw(&sta->deflink)); if (bw != sta->sta.deflink.bandwidth) { sta->sta.deflink.bandwidth = bw; - rate_control_rate_update(local, sband, sta, 0, + rate_control_rate_update(local, sband, + &sta->deflink, IEEE80211_RC_BW_CHANGED); /* * if a TDLS peer BW was updated, we need to diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 880a1fa8705a..94c00e71f6f8 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -313,7 +313,7 @@ int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx, * Record previously received IV, will be copied into the * key information after MIC verification. It is possible * that we don't catch replays of fragments but that's ok - * because the Michael MIC verication will then fail. + * because the Michael MIC verification will then fail. */ *out_iv32 = iv32; *out_iv16 = iv16; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index dc498cd8cd91..7a4985fc2b16 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -939,31 +939,34 @@ TRACE_EVENT(drv_sta_set_txpwr, ) ); -TRACE_EVENT(drv_sta_rc_update, +TRACE_EVENT(drv_link_sta_rc_update, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, + struct ieee80211_link_sta *link_sta, u32 changed), - TP_ARGS(local, sdata, sta, changed), + TP_ARGS(local, sdata, link_sta, changed), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY __field(u32, changed) + __field(u32, link_id) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - STA_ASSIGN; + STA_NAMED_ASSIGN(link_sta->sta); __entry->changed = changed; + __entry->link_id = link_sta->link_id; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " changed: 0x%x", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->changed + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " (link %d) changed: 0x%x", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->link_id, + __entry->changed ) ); @@ -3154,6 +3157,25 @@ TRACE_EVENT(drv_neg_ttlm_res, LOCAL_PR_ARG, VIF_PR_ARG, __entry->res ) ); + +TRACE_EVENT(drv_prep_add_interface, + TP_PROTO(struct ieee80211_local *local, + enum nl80211_iftype type), + + TP_ARGS(local, type), + TP_STRUCT__entry(LOCAL_ENTRY + __field(u32, type) + ), + + TP_fast_assign(LOCAL_ASSIGN; + __entry->type = type; + ), + + TP_printk(LOCAL_PR_FMT " type: %u\n ", + LOCAL_PR_ARG, __entry->type + ) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0ff8b56f5807..a24636bda679 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1763,7 +1763,8 @@ static bool __ieee80211_tx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { vif = &sdata->vif; break; } @@ -3952,7 +3953,8 @@ begin: switch (tx.sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + if ((tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) || + ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) { vif = &tx.sdata->vif; break; } @@ -6214,7 +6216,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, goto start_xmit; /* update QoS header to prioritize control port frames if possible, - * priorization also happens for control port frames send over + * prioritization also happens for control port frames send over * AF_PACKET */ rcu_read_lock(); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f94faa86ba8a..a4e1301cc999 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -756,7 +756,8 @@ static void __iterate_interfaces(struct ieee80211_local *local, lockdep_is_held(&local->hw.wiphy->mtx)) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; break; case NL80211_IFTYPE_AP_VLAN: @@ -1010,7 +1011,7 @@ void ieee80211_set_wmm_default(struct ieee80211_link_data *link, else aCWmin = 15; - /* Confiure old 802.11b/g medium access rules. */ + /* Configure old 802.11b/g medium access rules. */ qparam.cw_max = aCWmax; qparam.cw_min = aCWmin; qparam.txop = 0; @@ -1873,8 +1874,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_MONITOR && ieee80211_sdata_running(sdata)) { res = drv_add_interface(local, sdata); if (WARN_ON(res)) @@ -1887,11 +1890,14 @@ int ieee80211_reconfig(struct ieee80211_local *local) */ if (res) { list_for_each_entry_continue_reverse(sdata, &local->interfaces, - list) + list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_MONITOR && ieee80211_sdata_running(sdata)) drv_remove_interface(local, sdata); + } ieee80211_handle_reconfig_failure(local); return res; } @@ -2752,7 +2758,7 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos + sizeof(struct ieee80211_vht_operation); } -u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef) +u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef) { struct ieee80211_he_operation *he_oper; struct ieee80211_he_6ghz_oper *he_6ghz_op; @@ -2844,7 +2850,7 @@ out: return pos; } -u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef, +u8 *ieee80211_ie_build_eht_oper(u8 *pos, const struct cfg80211_chan_def *chandef, const struct ieee80211_sta_eht_cap *eht_cap) { diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index bf6ef45af757..6a20fa099190 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -280,10 +280,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* * This is a workaround for VHT-enabled STAs which break the spec * and have the VHT-MCS Rx map filled in with value 3 for all eight - * spacial streams, an example is AR9462. + * spatial streams, an example is AR9462. * * As per spec, in section 22.1.1 Introduction to the VHT PHY - * A VHT STA shall support at least single spactial stream VHT-MCSs + * A VHT STA shall support at least single spatial stream VHT-MCSs * 0 to 7 (transmit and receive) in all supported channel widths. */ if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) { @@ -479,28 +479,6 @@ ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *link_sta) } } -enum ieee80211_sta_rx_bandwidth -ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_20: - return IEEE80211_STA_RX_BW_20; - case NL80211_CHAN_WIDTH_40: - return IEEE80211_STA_RX_BW_40; - case NL80211_CHAN_WIDTH_80: - return IEEE80211_STA_RX_BW_80; - case NL80211_CHAN_WIDTH_160: - case NL80211_CHAN_WIDTH_80P80: - return IEEE80211_STA_RX_BW_160; - case NL80211_CHAN_WIDTH_320: - return IEEE80211_STA_RX_BW_320; - default: - WARN_ON_ONCE(1); - return IEEE80211_STA_RX_BW_20; - } -} - /* FIXME: rename/move - this deals with everything not just VHT */ enum ieee80211_sta_rx_bandwidth _ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta, @@ -766,8 +744,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, if (changed > 0) { ieee80211_recalc_min_chandef(sdata, link_sta->link_id); - rate_control_rate_update(local, sband, link_sta->sta, - link_sta->link_id, changed); + rate_control_rate_update(local, sband, link_sta, changed); } } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 293afa3f57c5..40d5d9e48479 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -598,9 +598,6 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad, memcpy(j_0, hdr->addr2, ETH_ALEN); memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN); - j_0[13] = 0; - j_0[14] = 0; - j_0[AES_BLOCK_SIZE - 1] = 0x01; ccmp_gcmp_aad(skb, aad, spp_amsdu); } diff --git a/net/mctp/device.c b/net/mctp/device.c index 85cc5f31f1e7..26ce34b7e88e 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -371,6 +371,8 @@ static int mctp_fill_link_af(struct sk_buff *skb, return -ENODATA; if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net)) return -EMSGSIZE; + if (nla_put_u8(skb, IFLA_MCTP_PHYS_BINDING, mdev->binding)) + return -EMSGSIZE; return 0; } @@ -385,6 +387,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev, if (!mdev) return 0; ret = nla_total_size(4); /* IFLA_MCTP_NET */ + ret += nla_total_size(1); /* IFLA_MCTP_PHYS_BINDING */ mctp_dev_put(mdev); return ret; } @@ -480,7 +483,8 @@ static int mctp_dev_notify(struct notifier_block *this, unsigned long event, } static int mctp_register_netdevice(struct net_device *dev, - const struct mctp_netdev_ops *ops) + const struct mctp_netdev_ops *ops, + enum mctp_phys_binding binding) { struct mctp_dev *mdev; @@ -489,17 +493,19 @@ static int mctp_register_netdevice(struct net_device *dev, return PTR_ERR(mdev); mdev->ops = ops; + mdev->binding = binding; return register_netdevice(dev); } int mctp_register_netdev(struct net_device *dev, - const struct mctp_netdev_ops *ops) + const struct mctp_netdev_ops *ops, + enum mctp_phys_binding binding) { int rc; rtnl_lock(); - rc = mctp_register_netdevice(dev, ops); + rc = mctp_register_netdevice(dev, ops, binding); rtnl_unlock(); return rc; @@ -535,14 +541,20 @@ int __init mctp_device_init(void) int err; register_netdevice_notifier(&mctp_dev_nb); - rtnl_af_register(&mctp_af_ops); + + err = rtnl_af_register(&mctp_af_ops); + if (err) + goto err_notifier; err = rtnl_register_many(mctp_device_rtnl_msg_handlers); - if (err) { - rtnl_af_unregister(&mctp_af_ops); - unregister_netdevice_notifier(&mctp_dev_nb); - } + if (err) + goto err_af; + return 0; +err_af: + rtnl_af_unregister(&mctp_af_ops); +err_notifier: + unregister_netdevice_notifier(&mctp_dev_nb); return err; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index df62638b6498..1f63b32d76d6 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1664,7 +1664,7 @@ static int nla_put_via(struct sk_buff *skb, u8 table, const void *addr, int alen) { static const int table_to_family[NEIGH_NR_TABLES + 1] = { - AF_INET, AF_INET6, AF_DECnet, AF_PACKET, + AF_INET, AF_INET6, AF_PACKET, }; struct nlattr *nla; struct rtvia *via; @@ -2753,7 +2753,9 @@ static int __init mpls_init(void) dev_add_pack(&mpls_packet_type); - rtnl_af_register(&mpls_af_ops); + err = rtnl_af_register(&mpls_af_ops); + if (err) + goto out_unregister_dev_type; err = rtnl_register_many(mpls_rtnl_msg_handlers); if (err) @@ -2773,6 +2775,7 @@ out_unregister_rtnl: rtnl_unregister_many(mpls_rtnl_msg_handlers); out_unregister_rtnl_af: rtnl_af_unregister(&mpls_af_ops); +out_unregister_dev_type: dev_remove_pack(&mpls_packet_type); out_unregister_pernet: unregister_pernet_subsys(&mpls_net_ops); diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 2d3efb405437..02205f7994d7 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM; if (sf->request_bkup) flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC; - if (sf->fully_established) + if (READ_ONCE(sf->fully_established)) flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED; if (sf->conn_finished) flags |= MPTCP_SUBFLOW_FLAG_CONNECTED; diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index bfb37c5a88c4..dcffd847af33 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -14,7 +14,7 @@ const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = { [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, - [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_BE32, }, [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16), [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, }, [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, }, diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 370c3836b771..1603b3702e22 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, return false; /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ - if (subflow->fully_established || snd_data_fin_enable || + if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable || subflow->snd_isn != TCP_SKB_CB(skb)->seq || sk->sk_state != TCP_ESTABLISHED) return false; @@ -930,7 +930,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, /* here we can process OoO, in-window pkts, only in-sequence 4th ack * will make the subflow fully established */ - if (likely(subflow->fully_established)) { + if (likely(READ_ONCE(subflow->fully_established))) { /* on passive sockets, check for 3rd ack retransmission * note that msk is always set by subflow_syn_recv_sock() * for mp_join subflows diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 620264c75dc2..16c336c51940 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) void mptcp_pm_connection_closed(struct mptcp_sock *msk) { pr_debug("msk=%p\n", msk); + + if (msk->token) + mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); } void mptcp_pm_subflow_established(struct mptcp_sock *msk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 45a2b5f05d38..7a0f7998376a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -512,7 +512,8 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) { struct mptcp_pm_addr_entry *entry; - list_for_each_entry(entry, &pernet->local_addr_list, list) { + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, + lockdep_is_held(&pernet->lock)) { if (entry->addr.id == id) return entry; } @@ -782,7 +783,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *alt = NULL; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); @@ -793,10 +794,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) { if (__mptcp_subflow_active(subflow)) { - mptcp_pm_send_ack(msk, subflow, false, false); - break; + if (!subflow->stale) { + mptcp_pm_send_ack(msk, subflow, false, false); + return; + } + + if (!alt) + alt = subflow; } } + + if (alt) + mptcp_pm_send_ack(msk, alt, false, false); } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, @@ -1134,17 +1143,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc { struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; - int ret = -1; + int ret; pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { - ret = entry->addr.id; - break; - } - } + entry = __lookup_addr(pernet, skc); + ret = entry ? entry->addr.id : -1; rcu_read_unlock(); if (ret >= 0) return ret; @@ -1171,15 +1176,11 @@ bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); struct mptcp_pm_addr_entry *entry; - bool backup = false; + bool backup; rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { - backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - break; - } - } + entry = __lookup_addr(pernet, skc); + backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); rcu_read_unlock(); return backup; @@ -1816,7 +1817,7 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) goto fail; } - spin_lock_bh(&pernet->lock); + rcu_read_lock(); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { GENL_SET_ERR_MSG(info, "address not found"); @@ -1830,11 +1831,11 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, reply); ret = genlmsg_reply(msg, info); - spin_unlock_bh(&pernet->lock); + rcu_read_unlock(); return ret; unlock_fail: - spin_unlock_bh(&pernet->lock); + rcu_read_unlock(); fail: nlmsg_free(msg); @@ -1858,7 +1859,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, pernet = pm_nl_get_pernet(net); - spin_lock_bh(&pernet->lock); + rcu_read_lock(); for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { if (test_bit(i, pernet->id_bitmap)) { entry = __lookup_addr_by_id(pernet, i); @@ -1883,7 +1884,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, genlmsg_end(msg, hdr); } } - spin_unlock_bh(&pernet->lock); + rcu_read_unlock(); cb->args[0] = id; return msg->len; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 48d480982b78..08a72242428c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2728,8 +2728,8 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) return; - close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + - mptcp_close_timeout(sk); + close_timeout = (unsigned long)inet_csk(sk)->icsk_mtup.probe_timestamp - + tcp_jiffies32 + jiffies + mptcp_close_timeout(sk); /* the close timeout takes precedence on the fail one, and here at least one of * them is active @@ -3147,8 +3147,7 @@ cleanup: sock_hold(sk); pr_debug("msk=%p state=%d\n", sk, sk->sk_state); - if (msk->token) - mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); + mptcp_pm_connection_closed(msk); if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); @@ -3214,8 +3213,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); - if (msk->token) - mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); + mptcp_pm_connection_closed(msk); /* msk->subflow is still intact, the following will not free the first * subflow @@ -3519,7 +3517,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk) struct tcp_sock *tp = tcp_sk(ssk); unsigned long timeout; - if (mptcp_subflow_ctx(ssk)->fully_established) + if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established)) return; /* reschedule with a timeout above RTT, as we must look only for drop */ @@ -3530,7 +3528,8 @@ static void schedule_3rdack_retransmission(struct sock *ssk) timeout += jiffies; WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER); - icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + smp_store_release(&icsk->icsk_ack.pending, + icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); icsk->icsk_ack.timeout = timeout; sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 568a72702b08..a93e661ef5c4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -513,7 +513,6 @@ struct mptcp_subflow_context { request_bkup : 1, mp_capable : 1, /* remote is MPTCP capable */ mp_join : 1, /* remote is JOINing */ - fully_established : 1, /* path validated */ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, @@ -532,10 +531,11 @@ struct mptcp_subflow_context { is_mptfo : 1, /* subflow is doing TFO */ close_event_done : 1, /* has done the post-closed part */ mpc_drop : 1, /* the MPC option has been dropped in a rtx */ - __unused : 8; + __unused : 9; bool data_avail; bool scheduled; bool pm_listener; /* a listener managed by the kernel PM? */ + bool fully_established; /* path validated */ u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -780,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk) static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ - if (subflow->request_join && !subflow->fully_established) + if (subflow->request_join && !READ_ONCE(subflow->fully_established)) return false; return __tcp_can_send(mptcp_subflow_tcp_sock(subflow)); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 78ed508ebc1b..df7dbcfa3b71 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -60,7 +60,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) size_t offs = 0; rcu_read_lock(); - spin_lock(&mptcp_sched_list_lock); list_for_each_entry_rcu(sched, &mptcp_sched_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", @@ -69,7 +68,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) if (WARN_ON_ONCE(offs >= maxlen)) break; } - spin_unlock(&mptcp_sched_list_lock); rcu_read_unlock(); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6170f2fff71e..fd021cf8286e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk, const struct mptcp_options_received *mp_opt) { subflow_set_remote_key(msk, subflow, mp_opt); - subflow->fully_established = 1; + WRITE_ONCE(subflow->fully_established, true); WRITE_ONCE(msk->fully_established, true); if (subflow->is_mptfo) @@ -971,7 +971,8 @@ enum mapping_status { MAPPING_EMPTY, MAPPING_DATA_FIN, MAPPING_DUMMY, - MAPPING_BAD_CSUM + MAPPING_BAD_CSUM, + MAPPING_NODSS }; static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) @@ -1128,8 +1129,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, return MAPPING_EMPTY; } + /* If the required DSS has likely been dropped by a middlebox */ if (!subflow->map_valid) - return MAPPING_INVALID; + return MAPPING_NODSS; goto validate_seq; } @@ -1343,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY || - status == MAPPING_BAD_CSUM)) + status == MAPPING_BAD_CSUM || status == MAPPING_NODSS)) goto fallback; if (status != MAPPING_OK) @@ -1396,7 +1398,9 @@ fallback: * subflow_error_report() will introduce the appropriate barriers */ subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; + subflow->reset_reason = status == MAPPING_NODSS ? + MPTCP_RST_EMIDDLEBOX : + MPTCP_RST_EMPTCP; reset: WRITE_ONCE(ssk->sk_err, EBADMSG); @@ -2045,7 +2049,6 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_error_report = old_ctx->tcp_error_report; new_ctx->rel_write_seq = 1; - new_ctx->tcp_sock = newsk; if (subflow_req->mp_capable) { /* see comments in subflow_syn_recv_sock(), MPTCP connection @@ -2062,7 +2065,7 @@ static void subflow_ulp_clone(const struct request_sock *req, } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; - new_ctx->fully_established = 1; + WRITE_ONCE(new_ctx->fully_established, true); new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; new_ctx->request_bkup = subflow_req->request_bkup; diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index e4fa00abde6a..5988b9bb9029 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -163,11 +163,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) { + if (ip > ip_to) swap(ip, ip_to); - if (ip < map->first_ip) - return -IPSET_ERR_BITMAP_RANGE; - } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -178,7 +175,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], ip_to = ip; } - if (ip_to > map->last_ip) + if (ip < map->first_ip || ip_to > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; for (; !before(ip_to, ip); ip += map->hosts) { diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index dc6ddc4abbe2..7d13110ce188 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3662,10 +3662,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); udest->port = nla_get_be16(nla_port); - if (nla_addr_family) - udest->af = nla_get_u16(nla_addr_family); - else - udest->af = 0; + udest->af = nla_get_u16_default(nla_addr_family, 0); /* If a full entry was requested, check for the additional fields */ if (full_entry) { diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 3d64a4511fcf..06b084844700 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -43,7 +43,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, hook = rcu_dereference(*ptr_global_hook); if (!hook) { rcu_read_unlock(); - err = request_module(mod); + err = request_module("%s", mod); if (err) return ERR_PTR(err < 0 ? err : -EINVAL); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6a1239433830..36168f8b6efa 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3870,7 +3870,7 @@ static int __init ctnetlink_init(void) { int ret; - NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx); + NL_ASSERT_CTX_FITS(struct ctnetlink_list_dump_ctx); ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 4085c436e306..aad84aabd7f1 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1090,10 +1090,8 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], range->flags |= NF_NAT_RANGE_MAP_IPS; } - if (tb[CTA_NAT_V4_MAXIP]) - range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); - else - range->max_addr.ip = range->min_addr.ip; + range->max_addr.ip = nla_get_be32_default(tb[CTA_NAT_V4_MAXIP], + range->min_addr.ip); return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 588a2757986c..21b6f7410a1f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -26,6 +26,9 @@ #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) #define NFT_SET_MAX_ANONLEN 16 +/* limit compaction to avoid huge kmalloc/krealloc sizes. */ +#define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem)) + unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); @@ -391,6 +394,86 @@ static void nf_tables_unregister_hook(struct net *net, return __nf_tables_unregister_hook(net, table, chain, false); } +static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b) +{ + /* NB: the ->bound equality check is defensive, at this time we only merge + * a new nft_trans_elem transaction request with the transaction tail + * element, but a->bound != b->bound would imply a NEWRULE transaction + * is queued in-between. + * + * The set check is mandatory, the NFT_MAX_SET_NELEMS check prevents + * huge krealloc() requests. + */ + return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS; +} + +static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net, + struct nft_trans_elem *tail, + struct nft_trans_elem *trans, + gfp_t gfp) +{ + unsigned int nelems, old_nelems = tail->nelems; + struct nft_trans_elem *new_trans; + + if (!nft_trans_collapse_set_elem_allowed(tail, trans)) + return false; + + /* "cannot happen", at this time userspace element add + * requests always allocate a new transaction element. + * + * This serves as a reminder to adjust the list_add_tail + * logic below in case this ever changes. + */ + if (WARN_ON_ONCE(trans->nelems != 1)) + return false; + + if (check_add_overflow(old_nelems, trans->nelems, &nelems)) + return false; + + /* krealloc might free tail which invalidates list pointers */ + list_del_init(&tail->nft_trans.list); + + new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp); + if (!new_trans) { + list_add_tail(&tail->nft_trans.list, &nft_net->commit_list); + return false; + } + + /* + * new_trans->nft_trans.list contains garbage, but + * list_add_tail() doesn't care. + */ + new_trans->nelems = nelems; + new_trans->elems[old_nelems] = trans->elems[0]; + list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list); + + return true; +} + +static bool nft_trans_try_collapse(struct nftables_pernet *nft_net, + struct nft_trans *trans, gfp_t gfp) +{ + struct nft_trans *tail; + + if (list_empty(&nft_net->commit_list)) + return false; + + tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list); + + if (tail->msg_type != trans->msg_type) + return false; + + switch (trans->msg_type) { + case NFT_MSG_NEWSETELEM: + case NFT_MSG_DELSETELEM: + return nft_trans_collapse_set_elem(nft_net, + nft_trans_container_elem(tail), + nft_trans_container_elem(trans), gfp); + } + + return false; +} + static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -421,6 +504,24 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr } } +static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans, + gfp_t gfp) +{ + struct nftables_pernet *nft_net = nft_pernet(net); + + WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM && + trans->msg_type != NFT_MSG_DELSETELEM); + + might_alloc(gfp); + + if (nft_trans_try_collapse(nft_net, trans, gfp)) { + kfree(trans); + return; + } + + nft_trans_commit_list_add_tail(net, trans); +} + static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; @@ -1825,7 +1926,8 @@ nla_put_failure: return -ENOSPC; } -static int nft_dump_basechain_hook(struct sk_buff *skb, int family, +static int nft_dump_basechain_hook(struct sk_buff *skb, + const struct net *net, int family, const struct nft_base_chain *basechain, const struct list_head *hook_list) { @@ -1850,7 +1952,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family, if (!hook_list) hook_list = &basechain->hook_list; - list_for_each_entry_rcu(hook, hook_list, list) { + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { if (!first) first = hook; @@ -1901,7 +2004,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nft_base_chain *basechain = nft_base_chain(chain); struct nft_stats __percpu *stats; - if (nft_dump_basechain_hook(skb, family, basechain, hook_list)) + if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CHAIN_POLICY, @@ -2083,14 +2186,14 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy, NULL); if (err < 0) - return ERR_PTR(err); + return ERR_PTR_PCPU(err); if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) - return ERR_PTR(-EINVAL); + return ERR_PTR_PCPU(-EINVAL); newstats = netdev_alloc_pcpu_stats(struct nft_stats); if (newstats == NULL) - return ERR_PTR(-ENOMEM); + return ERR_PTR_PCPU(-ENOMEM); /* Restore old counters on this cpu, no problem. Per-cpu statistics * are not exposed to userspace. @@ -2534,10 +2637,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); - if (IS_ERR(stats)) { + if (IS_ERR_PCPU(stats)) { nft_chain_release_hook(&hook); kfree(basechain); - return PTR_ERR(stats); + return PTR_ERR_PCPU(stats); } rcu_assign_pointer(basechain->stats, stats); } @@ -2651,7 +2754,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, struct nft_table *table = ctx->table; struct nft_chain *chain = ctx->chain; struct nft_chain_hook hook = {}; - struct nft_stats *stats = NULL; + struct nft_stats __percpu *stats = NULL; struct nft_hook *h, *next; struct nf_hook_ops *ops; struct nft_trans *trans; @@ -2747,8 +2850,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); - if (IS_ERR(stats)) { - err = PTR_ERR(stats); + if (IS_ERR_PCPU(stats)) { + err = PTR_ERR_PCPU(stats); goto err_hooks; } } @@ -3295,25 +3398,37 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME]) return -EINVAL; + rcu_read_lock(); + type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); - if (!type) - return -ENOENT; + if (!type) { + err = -ENOENT; + goto out_unlock; + } - if (!type->inner_ops) - return -EOPNOTSUPP; + if (!type->inner_ops) { + err = -EOPNOTSUPP; + goto out_unlock; + } err = nla_parse_nested_deprecated(info->tb, type->maxattr, tb[NFTA_EXPR_DATA], type->policy, NULL); if (err < 0) - goto err_nla_parse; + goto out_unlock; info->attr = nla; info->ops = type->inner_ops; + /* No module reference will be taken on type->owner. + * Presence of type->inner_ops implies that the expression + * is builtin, so it cannot go away. + */ + rcu_read_unlock(); return 0; -err_nla_parse: +out_unlock: + rcu_read_unlock(); return err; } @@ -3412,13 +3527,15 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) * Rules */ -static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain, +static struct nft_rule *__nft_rule_lookup(const struct net *net, + const struct nft_chain *chain, u64 handle) { struct nft_rule *rule; // FIXME: this sucks - list_for_each_entry_rcu(rule, &chain->rules, list) { + list_for_each_entry_rcu(rule, &chain->rules, list, + lockdep_commit_lock_is_held(net)) { if (handle == rule->handle) return rule; } @@ -3426,13 +3543,14 @@ static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain, return ERR_PTR(-ENOENT); } -static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain, +static struct nft_rule *nft_rule_lookup(const struct net *net, + const struct nft_chain *chain, const struct nlattr *nla) { if (nla == NULL) return ERR_PTR(-EINVAL); - return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); + return __nft_rule_lookup(net, chain, be64_to_cpu(nla_get_be64(nla))); } static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { @@ -3733,7 +3851,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) return 0; } -/* called with rcu_read_lock held */ +/* Caller must hold rcu read lock or transaction mutex */ static struct sk_buff * nf_tables_getrule_single(u32 portid, const struct nfnl_info *info, const struct nlattr * const nla[], bool reset) @@ -3760,7 +3878,7 @@ nf_tables_getrule_single(u32 portid, const struct nfnl_info *info, return ERR_CAST(chain); } - rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + rule = nft_rule_lookup(net, chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return ERR_CAST(rule); @@ -3984,7 +4102,8 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) struct nft_set_ext *ext; int ret = 0; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_rcu(catchall, &set->catchall_list, list, + lockdep_commit_lock_is_held(ctx->net)) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, dummy_iter.genmask)) continue; @@ -4058,7 +4177,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_RULE_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); - rule = __nft_rule_lookup(chain, handle); + rule = __nft_rule_lookup(net, chain, handle); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); return PTR_ERR(rule); @@ -4080,7 +4199,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); - old_rule = __nft_rule_lookup(chain, pos_handle); + old_rule = __nft_rule_lookup(net, chain, pos_handle); if (IS_ERR(old_rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); return PTR_ERR(old_rule); @@ -4297,7 +4416,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, if (chain) { if (nla[NFTA_RULE_HANDLE]) { - rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + rule = nft_rule_lookup(info->net, chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) { if (PTR_ERR(rule) == -ENOENT && NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE) @@ -4457,7 +4576,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy), }; -static struct nft_set *nft_set_lookup(const struct nft_table *table, +static struct nft_set *nft_set_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nft_set *set; @@ -4465,7 +4585,8 @@ static struct nft_set *nft_set_lookup(const struct nft_table *table, if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry_rcu(set, &table->sets, list) { + list_for_each_entry_rcu(set, &table->sets, list, + lockdep_commit_lock_is_held(net)) { if (!nla_strcmp(nla, set->name) && nft_active_genmask(set, genmask)) return set; @@ -4515,7 +4636,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net, { struct nft_set *set; - set = nft_set_lookup(table, nla_set_name, genmask); + set = nft_set_lookup(net, table, nla_set_name, genmask); if (IS_ERR(set)) { if (!nla_set_id) return set; @@ -4891,7 +5012,7 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info, if (!nla[NFTA_SET_TABLE]) return -EINVAL; - set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask); + set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return PTR_ERR(set); @@ -5227,7 +5348,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); - set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask); + set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { if (PTR_ERR(set) != -ENOENT) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); @@ -5429,7 +5550,7 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info, set = nft_set_lookup_byhandle(table, attr, genmask); } else { attr = nla[NFTA_SET_NAME]; - set = nft_set_lookup(table, attr, genmask); + set = nft_set_lookup(net, table, attr, genmask); } if (IS_ERR(set)) { @@ -5493,7 +5614,8 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, struct nft_set_ext *ext; int ret = 0; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_rcu(catchall, &set->catchall_list, list, + lockdep_commit_lock_is_held(ctx->net)) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask)) continue; @@ -6259,7 +6381,7 @@ static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx, return PTR_ERR(table); } - set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); + set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); @@ -6410,17 +6532,21 @@ err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); } -static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, +static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx, int msg_type, struct nft_set *set) { + struct nft_trans_elem *te; struct nft_trans *trans; - trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem)); + trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1)); if (trans == NULL) return NULL; - nft_trans_elem_set(trans) = set; + te = nft_trans_container_elem(trans); + te->nelems = 1; + te->set = set; + return trans; } @@ -6542,28 +6668,52 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, } /* Drop references and destroy. Called from gc, dynset and abort path. */ -void nft_set_elem_destroy(const struct nft_set *set, - const struct nft_elem_priv *elem_priv, - bool destroy_expr) +static void __nft_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_elem_priv *elem_priv, + bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); - struct nft_ctx ctx = { - .net = read_pnet(&set->net), - .family = set->table->family, - }; nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) - nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); + nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_dec(&(*nft_set_ext_obj(ext))->use); kfree(elem_priv); } + +/* Drop references and destroy. Called from gc and dynset. */ +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, + bool destroy_expr) +{ + struct nft_ctx ctx = { + .net = read_pnet(&set->net), + .family = set->table->family, + }; + + __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr); +} EXPORT_SYMBOL_GPL(nft_set_elem_destroy); +/* Drop references and destroy. Called from abort path. */ +static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te) +{ + int i; + + for (i = 0; i < te->nelems; i++) { + /* skip update request, see nft_trans_elems_new_abort() */ + if (!te->elems[i].priv) + continue; + + __nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true); + } +} + /* Destroy element. References have been already dropped in the preparation * path via nft_setelem_data_deactivate(). */ @@ -6579,6 +6729,15 @@ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, kfree(elem_priv); } +static void nft_trans_elems_destroy(const struct nft_ctx *ctx, + const struct nft_trans_elem *te) +{ + int i; + + for (i = 0; i < te->nelems; i++) + nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv); +} + int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]) { @@ -6735,6 +6894,38 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set, } } +static void nft_trans_elem_update(const struct nft_set *set, + const struct nft_trans_one_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_elem_update *update = elem->update; + + if (update->flags & NFT_TRANS_UPD_TIMEOUT) + WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout); + + if (update->flags & NFT_TRANS_UPD_EXPIRATION) + WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration); +} + +static void nft_trans_elems_add(const struct nft_ctx *ctx, + struct nft_trans_elem *te) +{ + int i; + + for (i = 0; i < te->nelems; i++) { + struct nft_trans_one_elem *elem = &te->elems[i]; + + if (elem->update) + nft_trans_elem_update(te->set, elem); + else + nft_setelem_activate(ctx->net, te->set, elem->priv); + + nf_tables_setelem_notify(ctx, te->set, elem->priv, + NFT_MSG_NEWSETELEM); + kfree(elem->update); + } +} + static int nft_setelem_catchall_deactivate(const struct net *net, struct nft_set *set, struct nft_set_elem *elem) @@ -6817,6 +7008,26 @@ static void nft_setelem_remove(const struct net *net, set->ops->remove(net, set, elem_priv); } +static void nft_trans_elems_remove(const struct nft_ctx *ctx, + const struct nft_trans_elem *te) +{ + int i; + + for (i = 0; i < te->nelems; i++) { + WARN_ON_ONCE(te->elems[i].update); + + nf_tables_setelem_notify(ctx, te->set, + te->elems[i].priv, + te->nft_trans.msg_type); + + nft_setelem_remove(ctx->net, te->set, te->elems[i].priv); + if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) { + atomic_dec(&te->set->nelems); + te->set->ndeact--; + } + } +} + static bool nft_setelem_valid_key_end(const struct nft_set *set, struct nlattr **nla, u32 flags) { @@ -6853,7 +7064,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; - u8 update_flags; u64 expiration; u64 timeout; int err, i; @@ -7168,23 +7378,33 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, else if (!(nlmsg_flags & NLM_F_EXCL)) { err = 0; if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) { - update_flags = 0; + struct nft_elem_update update = { }; + if (timeout != nft_set_ext_timeout(ext2)->timeout) { - nft_trans_elem_timeout(trans) = timeout; + update.timeout = timeout; if (expiration == 0) expiration = timeout; - update_flags |= NFT_TRANS_UPD_TIMEOUT; + update.flags |= NFT_TRANS_UPD_TIMEOUT; } if (expiration) { - nft_trans_elem_expiration(trans) = expiration; - update_flags |= NFT_TRANS_UPD_EXPIRATION; + update.expiration = expiration; + update.flags |= NFT_TRANS_UPD_EXPIRATION; } - if (update_flags) { - nft_trans_elem_priv(trans) = elem_priv; - nft_trans_elem_update_flags(trans) = update_flags; - nft_trans_commit_list_add_tail(ctx->net, trans); + if (update.flags) { + struct nft_trans_one_elem *ue; + + ue = &nft_trans_container_elem(trans)->elems[0]; + + ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL); + if (!ue->update) { + err = -ENOMEM; + goto err_element_clash; + } + + ue->priv = elem_priv; + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); goto err_elem_free; } } @@ -7207,8 +7427,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } } - nft_trans_elem_priv(trans) = elem.priv; - nft_trans_commit_list_add_tail(ctx->net, trans); + nft_trans_container_elem(trans)->elems[0].priv = elem.priv; + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); return 0; err_set_full: @@ -7345,6 +7565,55 @@ void nft_setelem_data_deactivate(const struct net *net, nft_use_dec(&(*nft_set_ext_obj(ext))->use); } +/* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem. + * No notifications and no ndeact changes. + * + * Returns true if set had been added to (i.e., elements need to be removed again). + */ +static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx, + struct nft_trans_elem *te) +{ + bool removed = false; + int i; + + for (i = 0; i < te->nelems; i++) { + if (te->elems[i].update) { + kfree(te->elems[i].update); + te->elems[i].update = NULL; + /* Update request, so do not release this element */ + te->elems[i].priv = NULL; + continue; + } + + if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv)) + nft_setelem_remove(ctx->net, te->set, te->elems[i].priv); + + if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) + atomic_dec(&te->set->nelems); + + removed = true; + } + + return removed; +} + +/* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */ +static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx, + const struct nft_trans_elem *te) +{ + int i; + + for (i = 0; i < te->nelems; i++) { + if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) { + nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv); + nft_setelem_activate(ctx->net, te->set, te->elems[i].priv); + } + + if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) + te->set->ndeact--; + } +} + static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -7424,8 +7693,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_setelem_data_deactivate(ctx->net, set, elem.priv); - nft_trans_elem_priv(trans) = elem.priv; - nft_trans_commit_list_add_tail(ctx->net, trans); + nft_trans_container_elem(trans)->elems[0].priv = elem.priv; + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); return 0; fail_ops: @@ -7451,7 +7720,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, return 0; trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, - sizeof(struct nft_trans_elem), GFP_ATOMIC); + struct_size_t(struct nft_trans_elem, elems, 1), + GFP_ATOMIC); if (!trans) return -ENOMEM; @@ -7460,8 +7730,9 @@ static int nft_setelem_flush(const struct nft_ctx *ctx, nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_elem_set(trans) = set; - nft_trans_elem_priv(trans) = elem_priv; - nft_trans_commit_list_add_tail(ctx->net, trans); + nft_trans_container_elem(trans)->nelems = 1; + nft_trans_container_elem(trans)->elems[0].priv = elem_priv; + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC); return 0; } @@ -7472,15 +7743,13 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx, { struct nft_trans *trans; - trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, - sizeof(struct nft_trans_elem), GFP_KERNEL); + trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); if (!trans) return -ENOMEM; nft_setelem_data_deactivate(ctx->net, set, elem_priv); - nft_trans_elem_set(trans) = set; - nft_trans_elem_priv(trans) = elem_priv; - nft_trans_commit_list_add_tail(ctx->net, trans); + nft_trans_container_elem(trans)->elems[0].priv = elem_priv; + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); return 0; } @@ -7493,7 +7762,8 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, struct nft_set_ext *ext; int ret = 0; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_rcu(catchall, &set->catchall_list, list, + lockdep_commit_lock_is_held(ctx->net)) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_active(ext, genmask)) continue; @@ -7543,7 +7813,7 @@ static int nf_tables_delsetelem(struct sk_buff *skb, return PTR_ERR(table); } - set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); + set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); @@ -7790,9 +8060,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, struct nft_trans *trans; int err = -ENOMEM; - if (!try_module_get(type->owner)) - return -ENOENT; - + /* caller must have obtained type->owner reference. */ trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) @@ -7860,15 +8128,16 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype, family); - if (WARN_ON_ONCE(!type)) - return -ENOENT; - if (!obj->ops->update) return 0; + type = nft_obj_type_get(net, objtype, family); + if (WARN_ON_ONCE(IS_ERR(type))) + return PTR_ERR(type); + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + /* type->owner reference is put when transaction object is released. */ return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj); } @@ -8104,7 +8373,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) return 0; } -/* called with rcu_read_lock held */ +/* Caller must hold rcu read lock or transaction mutex */ static struct sk_buff * nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, const struct nlattr * const nla[], bool reset) @@ -8373,12 +8642,14 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 }, }; -struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, +struct nft_flowtable *nft_flowtable_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nft_flowtable *flowtable; - list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + list_for_each_entry_rcu(flowtable, &table->flowtables, list, + lockdep_commit_lock_is_held(net)) { if (!nla_strcmp(nla, flowtable->name) && nft_active_genmask(flowtable, genmask)) return flowtable; @@ -8734,7 +9005,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, return PTR_ERR(table); } - flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME], genmask); if (IS_ERR(flowtable)) { err = PTR_ERR(flowtable); @@ -8928,7 +9199,7 @@ static int nf_tables_delflowtable(struct sk_buff *skb, flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask); } else { attr = nla[NFTA_FLOWTABLE_NAME]; - flowtable = nft_flowtable_lookup(table, attr, genmask); + flowtable = nft_flowtable_lookup(net, table, attr, genmask); } if (IS_ERR(flowtable)) { @@ -8998,7 +9269,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, if (!hook_list) hook_list = &flowtable->hook_list; - list_for_each_entry_rcu(hook, hook_list, list) { + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name)) goto nla_put_failure; } @@ -9140,7 +9412,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, return PTR_ERR(table); } - flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME], genmask); if (IS_ERR(flowtable)) { NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); @@ -9658,9 +9930,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - nf_tables_set_elem_destroy(&ctx, - nft_trans_elem_set(trans), - nft_trans_elem_priv(trans)); + nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: @@ -10233,9 +10503,24 @@ static void nf_tables_commit_audit_free(struct list_head *adl) } } +/* nft audit emits the number of elements that get added/removed/updated, + * so NEW/DELSETELEM needs to increment based on the total elem count. + */ +static unsigned int nf_tables_commit_audit_entrycount(const struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWSETELEM: + case NFT_MSG_DELSETELEM: + return nft_trans_container_elem(trans)->nelems; + } + + return 1; +} + static void nf_tables_commit_audit_collect(struct list_head *adl, - struct nft_table *table, u32 op) + const struct nft_trans *trans, u32 op) { + const struct nft_table *table = trans->table; struct nft_audit_data *adp; list_for_each_entry(adp, adl, list) { @@ -10245,7 +10530,7 @@ static void nf_tables_commit_audit_collect(struct list_head *adl, WARN_ONCE(1, "table=%s not expected in commit list", table->name); return; found: - adp->entries++; + adp->entries += nf_tables_commit_audit_entrycount(trans); if (!adp->op || adp->op > op) adp->op = op; } @@ -10404,7 +10689,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_ctx_update(&ctx, trans); - nf_tables_commit_audit_collect(&adl, table, trans->msg_type); + nf_tables_commit_audit_collect(&adl, trans, trans->msg_type); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { @@ -10513,25 +10798,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = nft_trans_container_elem(trans); - if (te->update_flags) { - const struct nft_set_ext *ext = - nft_set_elem_ext(te->set, te->elem_priv); + nft_trans_elems_add(&ctx, te); - if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) { - WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, - te->timeout); - } - if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) { - WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, - get_jiffies_64() + te->expiration); - } - } else { - nft_setelem_activate(net, te->set, te->elem_priv); - } - - nf_tables_setelem_notify(&ctx, te->set, - te->elem_priv, - NFT_MSG_NEWSETELEM); if (te->set->ops->commit && list_empty(&te->set->pending_update)) { list_add_tail(&te->set->pending_update, @@ -10543,14 +10811,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSETELEM: te = nft_trans_container_elem(trans); - nf_tables_setelem_notify(&ctx, te->set, - te->elem_priv, - trans->msg_type); - nft_setelem_remove(net, te->set, te->elem_priv); - if (!nft_setelem_is_catchall(te->set, te->elem_priv)) { - atomic_dec(&te->set->nelems); - te->set->ndeact--; - } + nft_trans_elems_remove(&ctx, te); + if (te->set->ops->commit && list_empty(&te->set->pending_update)) { list_add_tail(&te->set->pending_update, @@ -10670,8 +10932,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: - nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem_priv(trans), true); + nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans)); break; case NFT_MSG_NEWOBJ: nft_obj_destroy(&ctx, nft_trans_obj(trans)); @@ -10828,18 +11089,15 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - if (nft_trans_elem_update_flags(trans) || - nft_trans_elem_set_bound(trans)) { + if (nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } te = nft_trans_container_elem(trans); - if (!te->set->ops->abort || - nft_setelem_is_catchall(te->set, te->elem_priv)) - nft_setelem_remove(net, te->set, te->elem_priv); - - if (!nft_setelem_is_catchall(te->set, te->elem_priv)) - atomic_dec(&te->set->nelems); + if (!nft_trans_elems_new_abort(&ctx, te)) { + nft_trans_destroy(trans); + break; + } if (te->set->ops->abort && list_empty(&te->set->pending_update)) { @@ -10851,12 +11109,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DESTROYSETELEM: te = nft_trans_container_elem(trans); - if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { - nft_setelem_data_activate(net, te->set, te->elem_priv); - nft_setelem_activate(net, te->set, te->elem_priv); - } - if (!nft_setelem_is_catchall(te->set, te->elem_priv)) - te->set->ndeact--; + nft_trans_elems_destroy_abort(&ctx, te); if (te->set->ops->abort && list_empty(&te->set->pending_update)) { diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 7784ec094097..e598a2a252b0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -517,7 +517,7 @@ replay_abort: err = nla_parse_deprecated(cda, ss->cb[cb_id].attr_count, attr, attrlen, - ss->cb[cb_id].policy, NULL); + ss->cb[cb_id].policy, &extack); if (err < 0) goto ack; diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 7de95674fd8c..d550910aabec 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -17,6 +17,7 @@ struct nft_bitwise { u8 sreg; + u8 sreg2; u8 dreg; enum nft_bitwise_ops op:8; u8 len; @@ -25,8 +26,8 @@ struct nft_bitwise { struct nft_data data; }; -static void nft_bitwise_eval_bool(u32 *dst, const u32 *src, - const struct nft_bitwise *priv) +static void nft_bitwise_eval_mask_xor(u32 *dst, const u32 *src, + const struct nft_bitwise *priv) { unsigned int i; @@ -60,28 +61,72 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src, } } +static void nft_bitwise_eval_and(u32 *dst, const u32 *src, const u32 *src2, + const struct nft_bitwise *priv) +{ + unsigned int i, n; + + for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++) + dst[i] = src[i] & src2[i]; +} + +static void nft_bitwise_eval_or(u32 *dst, const u32 *src, const u32 *src2, + const struct nft_bitwise *priv) +{ + unsigned int i, n; + + for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++) + dst[i] = src[i] | src2[i]; +} + +static void nft_bitwise_eval_xor(u32 *dst, const u32 *src, const u32 *src2, + const struct nft_bitwise *priv) +{ + unsigned int i, n; + + for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++) + dst[i] = src[i] ^ src2[i]; +} + void nft_bitwise_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_bitwise *priv = nft_expr_priv(expr); - const u32 *src = ®s->data[priv->sreg]; + const u32 *src = ®s->data[priv->sreg], *src2; u32 *dst = ®s->data[priv->dreg]; - switch (priv->op) { - case NFT_BITWISE_BOOL: - nft_bitwise_eval_bool(dst, src, priv); - break; - case NFT_BITWISE_LSHIFT: + if (priv->op == NFT_BITWISE_MASK_XOR) { + nft_bitwise_eval_mask_xor(dst, src, priv); + return; + } + if (priv->op == NFT_BITWISE_LSHIFT) { nft_bitwise_eval_lshift(dst, src, priv); - break; - case NFT_BITWISE_RSHIFT: + return; + } + if (priv->op == NFT_BITWISE_RSHIFT) { nft_bitwise_eval_rshift(dst, src, priv); - break; + return; + } + + src2 = priv->sreg2 ? ®s->data[priv->sreg2] : priv->data.data; + + if (priv->op == NFT_BITWISE_AND) { + nft_bitwise_eval_and(dst, src, src2, priv); + return; + } + if (priv->op == NFT_BITWISE_OR) { + nft_bitwise_eval_or(dst, src, src2, priv); + return; + } + if (priv->op == NFT_BITWISE_XOR) { + nft_bitwise_eval_xor(dst, src, src2, priv); + return; } } static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_SREG2] = { .type = NLA_U32 }, [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, @@ -90,8 +135,8 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { [NFTA_BITWISE_DATA] = { .type = NLA_NESTED }, }; -static int nft_bitwise_init_bool(struct nft_bitwise *priv, - const struct nlattr *const tb[]) +static int nft_bitwise_init_mask_xor(struct nft_bitwise *priv, + const struct nlattr *const tb[]) { struct nft_data_desc mask = { .type = NFT_DATA_VALUE, @@ -105,7 +150,8 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv, }; int err; - if (tb[NFTA_BITWISE_DATA]) + if (tb[NFTA_BITWISE_DATA] || + tb[NFTA_BITWISE_SREG2]) return -EINVAL; if (!tb[NFTA_BITWISE_MASK] || @@ -139,7 +185,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, int err; if (tb[NFTA_BITWISE_MASK] || - tb[NFTA_BITWISE_XOR]) + tb[NFTA_BITWISE_XOR] || + tb[NFTA_BITWISE_SREG2]) return -EINVAL; if (!tb[NFTA_BITWISE_DATA]) @@ -157,6 +204,41 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, return 0; } +static int nft_bitwise_init_bool(const struct nft_ctx *ctx, + struct nft_bitwise *priv, + const struct nlattr *const tb[]) +{ + int err; + + if (tb[NFTA_BITWISE_MASK] || + tb[NFTA_BITWISE_XOR]) + return -EINVAL; + + if ((!tb[NFTA_BITWISE_DATA] && !tb[NFTA_BITWISE_SREG2]) || + (tb[NFTA_BITWISE_DATA] && tb[NFTA_BITWISE_SREG2])) + return -EINVAL; + + if (tb[NFTA_BITWISE_DATA]) { + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data), + .len = priv->len, + }; + + err = nft_data_init(NULL, &priv->data, &desc, + tb[NFTA_BITWISE_DATA]); + if (err < 0) + return err; + } else { + err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG2], + &priv->sreg2, priv->len); + if (err < 0) + return err; + } + + return 0; +} + static int nft_bitwise_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -185,32 +267,40 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, if (tb[NFTA_BITWISE_OP]) { priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])); switch (priv->op) { - case NFT_BITWISE_BOOL: + case NFT_BITWISE_MASK_XOR: case NFT_BITWISE_LSHIFT: case NFT_BITWISE_RSHIFT: + case NFT_BITWISE_AND: + case NFT_BITWISE_OR: + case NFT_BITWISE_XOR: break; default: return -EOPNOTSUPP; } } else { - priv->op = NFT_BITWISE_BOOL; + priv->op = NFT_BITWISE_MASK_XOR; } switch(priv->op) { - case NFT_BITWISE_BOOL: - err = nft_bitwise_init_bool(priv, tb); + case NFT_BITWISE_MASK_XOR: + err = nft_bitwise_init_mask_xor(priv, tb); break; case NFT_BITWISE_LSHIFT: case NFT_BITWISE_RSHIFT: err = nft_bitwise_init_shift(priv, tb); break; + case NFT_BITWISE_AND: + case NFT_BITWISE_OR: + case NFT_BITWISE_XOR: + err = nft_bitwise_init_bool(ctx, priv, tb); + break; } return err; } -static int nft_bitwise_dump_bool(struct sk_buff *skb, - const struct nft_bitwise *priv) +static int nft_bitwise_dump_mask_xor(struct sk_buff *skb, + const struct nft_bitwise *priv) { if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask, NFT_DATA_VALUE, priv->len) < 0) @@ -232,6 +322,21 @@ static int nft_bitwise_dump_shift(struct sk_buff *skb, return 0; } +static int nft_bitwise_dump_bool(struct sk_buff *skb, + const struct nft_bitwise *priv) +{ + if (priv->sreg2) { + if (nft_dump_register(skb, NFTA_BITWISE_SREG2, priv->sreg2)) + return -1; + } else { + if (nft_data_dump(skb, NFTA_BITWISE_DATA, &priv->data, + NFT_DATA_VALUE, sizeof(u32)) < 0) + return -1; + } + + return 0; +} + static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { @@ -248,13 +353,18 @@ static int nft_bitwise_dump(struct sk_buff *skb, return -1; switch (priv->op) { - case NFT_BITWISE_BOOL: - err = nft_bitwise_dump_bool(skb, priv); + case NFT_BITWISE_MASK_XOR: + err = nft_bitwise_dump_mask_xor(skb, priv); break; case NFT_BITWISE_LSHIFT: case NFT_BITWISE_RSHIFT: err = nft_bitwise_dump_shift(skb, priv); break; + case NFT_BITWISE_AND: + case NFT_BITWISE_OR: + case NFT_BITWISE_XOR: + err = nft_bitwise_dump_bool(skb, priv); + break; } return err; @@ -269,7 +379,7 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx, const struct nft_bitwise *priv = nft_expr_priv(expr); struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; - if (priv->op != NFT_BITWISE_BOOL) + if (priv->op != NFT_BITWISE_MASK_XOR) return -EOPNOTSUPP; if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) || @@ -299,6 +409,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track, track->regs[priv->dreg].bitwise && track->regs[priv->dreg].bitwise->ops == expr->ops && priv->sreg == bitwise->sreg && + priv->sreg2 == bitwise->sreg2 && priv->dreg == bitwise->dreg && priv->op == bitwise->op && priv->len == bitwise->len && @@ -375,7 +486,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - if (tb[NFTA_BITWISE_DATA]) + if (tb[NFTA_BITWISE_DATA] || + tb[NFTA_BITWISE_SREG2]) return -EINVAL; if (!tb[NFTA_BITWISE_MASK] || @@ -406,7 +518,7 @@ nft_bitwise_fast_dump(struct sk_buff *skb, return -1; if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32)))) return -1; - if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL))) + if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_MASK_XOR))) return -1; data.data[0] = priv->mask; @@ -501,7 +613,7 @@ nft_bitwise_select_ops(const struct nft_ctx *ctx, return &nft_bitwise_ops; if (tb[NFTA_BITWISE_OP] && - ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL) + ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_MASK_XOR) return &nft_bitwise_ops; return &nft_bitwise_fast_ops; diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 2f732fae5a83..3b474d235663 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -8,7 +8,7 @@ #include <linux/spinlock.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_tables.h> -#include <net/ip.h> /* for ipv4 options. */ +#include <net/ip.h> #include <net/inet_dscp.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> @@ -236,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; - fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK; + fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb))); fl.u.ip4.flowi4_mark = pkt->skb->mark; fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; break; @@ -409,8 +409,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, if (!tb[NFTA_FLOW_TABLE_NAME]) return -EINVAL; - flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME], - genmask); + flowtable = nft_flowtable_lookup(ctx->net, ctx->table, + tb[NFTA_FLOW_TABLE_NAME], genmask); if (IS_ERR(flowtable)) return PTR_ERR(flowtable); diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 1caa04619dc6..12390d2e994f 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -88,13 +88,15 @@ bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, } static struct nft_bitmap_elem * -nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, +nft_bitmap_elem_find(const struct net *net, + const struct nft_set *set, struct nft_bitmap_elem *this, u8 genmask) { const struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be; - list_for_each_entry_rcu(be, &priv->list, head) { + list_for_each_entry_rcu(be, &priv->list, head, + lockdep_is_held(&nft_pernet(net)->commit_mutex)) { if (memcmp(nft_set_ext_key(&be->ext), nft_set_ext_key(&this->ext), set->klen) || !nft_set_elem_active(&be->ext, genmask)) @@ -132,7 +134,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_next(net); u32 idx, off; - be = nft_bitmap_elem_find(set, new, genmask); + be = nft_bitmap_elem_find(net, set, new, genmask); if (be) { *elem_priv = &be->priv; return -EEXIST; @@ -201,7 +203,7 @@ nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, nft_bitmap_location(set, elem->key.val.data, &idx, &off); - be = nft_bitmap_elem_find(set, this, genmask); + be = nft_bitmap_elem_find(net, set, this, genmask); if (!be) return NULL; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index daa56dda737a..65bd291318f2 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -647,7 +647,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, int i; for (i = 0; i < priv->buckets; i++) { - hlist_for_each_entry_rcu(he, &priv->table[i], node) { + hlist_for_each_entry_rcu(he, &priv->table[i], node, + lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) { if (iter->count < iter->skip) goto cont; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 5c6ed68cc6e0..681301b46aa4 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -497,10 +497,7 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx, } if (tb[NFTA_TUNNEL_KEY_TOS]) info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]); - if (tb[NFTA_TUNNEL_KEY_TTL]) - info.key.ttl = nla_get_u8(tb[NFTA_TUNNEL_KEY_TTL]); - else - info.key.ttl = U8_MAX; + info.key.ttl = nla_get_u8_default(tb[NFTA_TUNNEL_KEY_TTL], U8_MAX); if (tb[NFTA_TUNNEL_KEY_OPTS]) { err = nft_tunnel_obj_opts_init(ctx, tb[NFTA_TUNNEL_KEY_OPTS], diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 689eaa2afbec..079fe72a6384 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -107,11 +107,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info, switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (info->attrs[NLBL_MGMT_A_FAMILY]) - entry->family = - nla_get_u16(info->attrs[NLBL_MGMT_A_FAMILY]); - else - entry->family = AF_UNSPEC; + entry->family = + nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY], + AF_UNSPEC); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -601,10 +599,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) struct netlbl_dom_map *entry; u16 family; - if (info->attrs[NLBL_MGMT_A_FAMILY]) - family = nla_get_u16(info->attrs[NLBL_MGMT_A_FAMILY]); - else - family = AF_INET; + family = nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY], AF_INET); ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8953c4dc3f08..dd3517b0fdfd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2269,7 +2269,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being - * required, but it makes sense to _attempt_ a 16K bytes allocation + * required, but it makes sense to _attempt_ a 32KiB allocation * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ @@ -2291,7 +2291,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; /* Trim skb to allocated size. User is expected to provide buffer as - * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at + * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as * could fit within the allocated skb. skb is typically allocated * with larger space than required (could be as much as near 2x the @@ -2925,12 +2925,8 @@ static int __init netlink_proto_init(void) for (i = 0; i < MAX_LINKS; i++) { if (rhashtable_init(&nl_table[i].hash, - &netlink_rhashtable_params) < 0) { - while (--i > 0) - rhashtable_destroy(&nl_table[i].hash); - kfree(nl_table); + &netlink_rhashtable_params) < 0) goto panic; - } } netlink_add_usersock_entry(); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 07ad65774fe2..104732d34543 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -997,7 +997,7 @@ static int genl_start(struct netlink_callback *cb) info->info.attrs = attrs; genl_info_net_set(&info->info, sock_net(cb->skb->sk)); info->info.extack = cb->extack; - memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr)); + memset(&info->info.ctx, 0, sizeof(info->info.ctx)); cb->data = info; if (ops->start) { @@ -1104,7 +1104,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, info.attrs = attrbuf; info.extack = extack; genl_info_net_set(&info, net); - memset(&info.user_ptr, 0, sizeof(info.user_ptr)); + memset(&info.ctx, 0, sizeof(info.ctx)); if (ops->pre_doit) { err = ops->pre_doit(ops, skb, &info); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index f456a5911e7d..1ec5955fe469 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -757,6 +757,14 @@ int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) } EXPORT_SYMBOL(nci_core_conn_close); +static void nci_set_target_ats(struct nfc_target *target, struct nci_dev *ndev) +{ + if (ndev->target_ats_len > 0) { + target->ats_len = ndev->target_ats_len; + memcpy(target->ats, ndev->target_ats, target->ats_len); + } +} + static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); @@ -939,8 +947,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT)); } - if (!rc) + if (!rc) { ndev->target_active_prot = protocol; + if (protocol == NFC_PROTO_ISO14443) + nci_set_target_ats(target, ndev); + } return rc; } diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 994a0a1efb58..a818eff27e6b 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -402,7 +402,7 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, switch (ntf->activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: nfca_poll = &ntf->activation_params.nfca_poll_iso_dep; - nfca_poll->rats_res_len = min_t(__u8, *data++, 20); + nfca_poll->rats_res_len = min_t(__u8, *data++, NFC_ATS_MAXSIZE); pr_debug("rats_res_len %d\n", nfca_poll->rats_res_len); if (nfca_poll->rats_res_len > 0) { memcpy(nfca_poll->rats_res, @@ -531,6 +531,28 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, return NCI_STATUS_OK; } +static int nci_store_ats_nfc_iso_dep(struct nci_dev *ndev, + const struct nci_rf_intf_activated_ntf *ntf) +{ + ndev->target_ats_len = 0; + + if (ntf->activation_params_len <= 0) + return NCI_STATUS_OK; + + if (ntf->activation_params.nfca_poll_iso_dep.rats_res_len > NFC_ATS_MAXSIZE) { + pr_debug("ATS too long\n"); + return NCI_STATUS_RF_PROTOCOL_ERROR; + } + + if (ntf->activation_params.nfca_poll_iso_dep.rats_res_len > 0) { + ndev->target_ats_len = ntf->activation_params.nfca_poll_iso_dep.rats_res_len; + memcpy(ndev->target_ats, ntf->activation_params.nfca_poll_iso_dep.rats_res, + ndev->target_ats_len); + } + + return NCI_STATUS_OK; +} + static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, const struct sk_buff *skb) { @@ -660,6 +682,14 @@ exit: if (err != NCI_STATUS_OK) pr_err("unable to store general bytes\n"); } + + /* store ATS to be reported later in nci_activate_target */ + if (ntf.rf_interface == NCI_RF_INTERFACE_ISO_DEP && + ntf.activation_rf_tech_and_mode == NCI_NFC_A_PASSIVE_POLL_MODE) { + err = nci_store_ats_nfc_iso_dep(ndev, &ntf); + if (err != NCI_STATUS_OK) + pr_err("unable to store ATS\n"); + } } if (!(ntf.activation_rf_tech_and_mode & NCI_RF_TECH_MODE_LISTEN_MASK)) { diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index dd2ce73a24fb..6a40b8d0350d 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -96,6 +96,11 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, goto nla_put_failure; } + if (target->ats_len > 0 && + nla_put(msg, NFC_ATTR_TARGET_ATS, target->ats_len, + target->ats)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 78d9961fcd44..225f6048867f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1828,8 +1828,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.dp = dp; parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX] - ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0; + parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0); /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -2266,8 +2265,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL) return -EOPNOTSUPP; - port_no = a[OVS_VPORT_ATTR_PORT_NO] - ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; + port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0); if (port_no >= DP_MAX_PORTS) return -EFBIG; @@ -2304,8 +2302,8 @@ restart: parms.dp = dp; parms.port_no = port_no; parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; - parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX] - ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0; + parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX], + 0); vport = new_vport(&parms); err = PTR_ERR(vport); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 729ef582a3a8..881ddd3696d5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1938,7 +1938,7 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) { - return attr ? nla_get_u32(attr) : 0; + return nla_get_u32_default(attr, 0); } /** diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 5858d65ea1a9..2412d7813d24 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -195,7 +195,6 @@ static int internal_dev_recv(struct sk_buff *skb) skb_dst_drop(skb); nf_reset_ct(skb); - secpath_reset(skb); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a705ec214254..886c0dd47b66 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1846,21 +1846,22 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -EINVAL; spin_lock(&po->bind_lock); - if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && + if (po->num && match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { - __dev_remove_pack(&po->prot_hook); - /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ WRITE_ONCE(po->fanout, match); po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); - __fanout_link(sk, po); + if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { + __dev_remove_pack(&po->prot_hook); + __fanout_link(sk, po); + } err = 0; } } @@ -2118,7 +2119,7 @@ retry: skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); - skb_setup_tx_timestamp(skb, sockc.tsflags); + skb_setup_tx_timestamp(skb, &sockc); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2650,7 +2651,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); - skb_setup_tx_timestamp(skb, sockc->tsflags); + skb_setup_tx_timestamp(skb, sockc); skb_zcopy_set_nouarg(skb, ph.raw); skb_reserve(skb, hlen); @@ -3115,7 +3116,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } - skb_setup_tx_timestamp(skb, sockc.tsflags); + skb_setup_tx_timestamp(skb, &sockc); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { @@ -3421,17 +3422,17 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; + po = pkt_sk(sk); + err = packet_alloc_pending(po); + if (err) + goto out_sk_free; + sock_init_data(sock, sk); - po = pkt_sk(sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; - err = packet_alloc_pending(po); - if (err) - goto out2; - packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; @@ -3463,7 +3464,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock_prot_inuse_add(net, &packet_proto, 1); return 0; -out2: +out_sk_free: sk_free(sk); out: return err; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index cde671d29d5d..5c36bae37b8f 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -22,7 +22,7 @@ #include <net/phonet/pn_dev.h> struct phonet_routes { - struct mutex lock; + spinlock_t lock; struct net_device __rcu *table[64]; }; @@ -54,7 +54,7 @@ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); - BUG_ON(!mutex_is_locked(&pndevs->lock)); + lockdep_assert_held(&pndevs->lock); list_add_rcu(&pnd->list, &pndevs->list); return pnd; } @@ -64,7 +64,8 @@ static struct phonet_device *__phonet_get(struct net_device *dev) struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; - BUG_ON(!mutex_is_locked(&pndevs->lock)); + lockdep_assert_held(&pndevs->lock); + list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; @@ -91,17 +92,22 @@ static void phonet_device_destroy(struct net_device *dev) ASSERT_RTNL(); - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + pnd = __phonet_get(dev); if (pnd) list_del_rcu(&pnd->list); - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); if (pnd) { + struct net *net = dev_net(dev); + u32 ifindex = dev->ifindex; u8 addr; for_each_set_bit(addr, pnd->addrs, 64) - phonet_address_notify(RTM_DELADDR, dev, addr); + phonet_address_notify(net, RTM_DELADDR, ifindex, addr); + kfree(pnd); } } @@ -133,7 +139,8 @@ int phonet_address_add(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) @@ -142,7 +149,9 @@ int phonet_address_add(struct net_device *dev, u8 addr) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); + return err; } @@ -152,7 +161,8 @@ int phonet_address_del(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) { err = -EADDRNOTAVAIL; @@ -161,7 +171,8 @@ int phonet_address_del(struct net_device *dev, u8 addr) list_del_rcu(&pnd->list); else pnd = NULL; - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); if (pnd) kfree_rcu(pnd, rcu); @@ -244,32 +255,39 @@ static int phonet_device_autoconf(struct net_device *dev) ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device); if (ret) return ret; - phonet_address_notify(RTM_NEWADDR, dev, - req.ifr_phonet_autoconf.device); + + phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex, + req.ifr_phonet_autoconf.device); return 0; } static void phonet_route_autodel(struct net_device *dev) { - struct phonet_net *pnn = phonet_pernet(dev_net(dev)); - unsigned int i; + struct net *net = dev_net(dev); DECLARE_BITMAP(deleted, 64); + u32 ifindex = dev->ifindex; + struct phonet_net *pnn; + unsigned int i; + + pnn = phonet_pernet(net); /* Remove left-over Phonet routes */ bitmap_zero(deleted, 64); - mutex_lock(&pnn->routes.lock); - for (i = 0; i < 64; i++) + + spin_lock(&pnn->routes.lock); + for (i = 0; i < 64; i++) { if (rcu_access_pointer(pnn->routes.table[i]) == dev) { RCU_INIT_POINTER(pnn->routes.table[i], NULL); set_bit(i, deleted); } - mutex_unlock(&pnn->routes.lock); + } + spin_unlock(&pnn->routes.lock); if (bitmap_empty(deleted, 64)) return; /* short-circuit RCU */ synchronize_rcu(); for_each_set_bit(i, deleted, 64) { - rtm_phonet_notify(RTM_DELROUTE, dev, i); + rtm_phonet_notify(net, RTM_DELROUTE, ifindex, i); dev_put(dev); } } @@ -309,8 +327,8 @@ static int __net_init phonet_init_net(struct net *net) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); - mutex_init(&pnn->pndevs.lock); - mutex_init(&pnn->routes.lock); + spin_lock_init(&pnn->pndevs.lock); + spin_lock_init(&pnn->routes.lock); return 0; } @@ -360,13 +378,15 @@ int phonet_route_add(struct net_device *dev, u8 daddr) int err = -EEXIST; daddr = daddr >> 2; - mutex_lock(&routes->lock); + + spin_lock(&routes->lock); if (routes->table[daddr] == NULL) { rcu_assign_pointer(routes->table[daddr], dev); dev_hold(dev); err = 0; } - mutex_unlock(&routes->lock); + spin_unlock(&routes->lock); + return err; } @@ -376,17 +396,19 @@ int phonet_route_del(struct net_device *dev, u8 daddr) struct phonet_routes *routes = &pnn->routes; daddr = daddr >> 2; - mutex_lock(&routes->lock); + + spin_lock(&routes->lock); if (rcu_access_pointer(routes->table[daddr]) == dev) RCU_INIT_POINTER(routes->table[daddr], NULL); else dev = NULL; - mutex_unlock(&routes->lock); + spin_unlock(&routes->lock); if (!dev) return -ENOENT; - synchronize_rcu(); - dev_put(dev); + + /* Note : our caller must call synchronize_rcu() and dev_put(dev) */ + return 0; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 894e5c72d6bf..b9043c92dc24 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -19,10 +19,10 @@ /* Device address handling */ -static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, +static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, u32 portid, u32 seq, int event); -void phonet_address_notify(int event, struct net_device *dev, u8 addr) +void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr) { struct sk_buff *skb; int err = -ENOBUFS; @@ -31,17 +31,18 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr) nla_total_size(1), GFP_KERNEL); if (skb == NULL) goto errout; - err = fill_addr(skb, dev, addr, 0, 0, event); + + err = fill_addr(skb, ifindex, addr, 0, 0, event); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, dev_net(dev), 0, - RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + + rtnl_notify(skb, net, 0, RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); return; errout: - rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_PHONET_IFADDR, err); } static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { @@ -64,8 +65,6 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - ASSERT_RTNL(); - err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy, extack); if (err < 0) @@ -79,21 +78,29 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, /* Phonet addresses only have 6 high-order bits */ return -EINVAL; - dev = __dev_get_by_index(net, ifm->ifa_index); - if (dev == NULL) + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifm->ifa_index); + if (!dev) { + rcu_read_unlock(); return -ENODEV; + } if (nlh->nlmsg_type == RTM_NEWADDR) err = phonet_address_add(dev, pnaddr); else err = phonet_address_del(dev, pnaddr); + + rcu_read_unlock(); + if (!err) - phonet_address_notify(nlh->nlmsg_type, dev, pnaddr); + phonet_address_notify(net, nlh->nlmsg_type, ifm->ifa_index, pnaddr); + return err; } -static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 portid, u32 seq, int event) +static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, + u32 portid, u32 seq, int event) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; @@ -107,7 +114,7 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, ifm->ifa_prefixlen = 0; ifm->ifa_flags = IFA_F_PERMANENT; ifm->ifa_scope = RT_SCOPE_LINK; - ifm->ifa_index = dev->ifindex; + ifm->ifa_index = ifindex; if (nla_put_u8(skb, IFA_LOCAL, addr)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -120,14 +127,17 @@ nla_put_failure: static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + int addr_idx = 0, addr_start_idx = cb->args[1]; + int dev_idx = 0, dev_start_idx = cb->args[0]; struct phonet_device_list *pndevs; struct phonet_device *pnd; - int dev_idx = 0, dev_start_idx = cb->args[0]; - int addr_idx = 0, addr_start_idx = cb->args[1]; + int err = 0; pndevs = phonet_device_list(sock_net(skb->sk)); + rcu_read_lock(); list_for_each_entry_rcu(pnd, &pndevs->list, list) { + DECLARE_BITMAP(addrs, 64); u8 addr; if (dev_idx > dev_start_idx) @@ -136,29 +146,32 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; addr_idx = 0; - for_each_set_bit(addr, pnd->addrs, 64) { + memcpy(addrs, pnd->addrs, sizeof(pnd->addrs)); + + for_each_set_bit(addr, addrs, 64) { if (addr_idx++ < addr_start_idx) continue; - if (fill_addr(skb, pnd->netdev, addr << 2, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) + err = fill_addr(skb, READ_ONCE(pnd->netdev->ifindex), + addr << 2, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWADDR); + if (err < 0) goto out; } } - out: rcu_read_unlock(); + cb->args[0] = dev_idx; cb->args[1] = addr_idx; - return skb->len; + return err; } /* Routes handling */ -static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, - u32 portid, u32 seq, int event) +static int fill_route(struct sk_buff *skb, u32 ifindex, u8 dst, + u32 portid, u32 seq, int event) { struct rtmsg *rtm; struct nlmsghdr *nlh; @@ -177,8 +190,7 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; - if (nla_put_u8(skb, RTA_DST, dst) || - nla_put_u32(skb, RTA_OIF, READ_ONCE(dev->ifindex))) + if (nla_put_u8(skb, RTA_DST, dst) || nla_put_u32(skb, RTA_OIF, ifindex)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -188,7 +200,7 @@ nla_put_failure: return -EMSGSIZE; } -void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) +void rtm_phonet_notify(struct net *net, int event, u32 ifindex, u8 dst) { struct sk_buff *skb; int err = -ENOBUFS; @@ -197,17 +209,18 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) nla_total_size(1) + nla_total_size(4), GFP_KERNEL); if (skb == NULL) goto errout; - err = fill_route(skb, dev, dst, 0, 0, event); + + err = fill_route(skb, ifindex, dst, 0, 0, event); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, dev_net(dev), 0, - RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL); + + rtnl_notify(skb, net, 0, RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL); return; errout: - rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_ROUTE, err); + rtnl_set_sk_err(net, RTNLGRP_PHONET_ROUTE, err); } static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { @@ -220,8 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct nlattr *tb[RTA_MAX+1]; + bool sync_needed = false; struct net_device *dev; struct rtmsg *rtm; + u32 ifindex; int err; u8 dst; @@ -231,8 +246,6 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - ASSERT_RTNL(); - err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy, extack); if (err < 0) @@ -247,16 +260,33 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (dst & 3) /* Phonet addresses only have 6 high-order bits */ return -EINVAL; - dev = __dev_get_by_index(net, nla_get_u32(tb[RTA_OIF])); - if (dev == NULL) + ifindex = nla_get_u32(tb[RTA_OIF]); + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); return -ENODEV; + } - if (nlh->nlmsg_type == RTM_NEWROUTE) + if (nlh->nlmsg_type == RTM_NEWROUTE) { err = phonet_route_add(dev, dst); - else + } else { err = phonet_route_del(dev, dst); + if (!err) + sync_needed = true; + } + + rcu_read_unlock(); + + if (sync_needed) { + synchronize_rcu(); + dev_put(dev); + } if (!err) - rtm_phonet_notify(nlh->nlmsg_type, dev, dst); + rtm_phonet_notify(net, nlh->nlmsg_type, ifindex, dst); + return err; } @@ -273,7 +303,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (!dev) continue; - err = fill_route(skb, dev, addr << 2, + err = fill_route(skb, READ_ONCE(dev->ifindex), addr << 2, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE); if (err < 0) @@ -286,13 +316,18 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } static const struct rtnl_msg_handler phonet_rtnl_msg_handlers[] __initdata_or_module = { - {THIS_MODULE, PF_PHONET, RTM_NEWADDR, addr_doit, NULL, 0}, - {THIS_MODULE, PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0}, - {THIS_MODULE, PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0}, - {THIS_MODULE, PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0}, - {THIS_MODULE, PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0}, - {THIS_MODULE, PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, - RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_NEWADDR, + .doit = addr_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_DELADDR, + .doit = addr_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_GETADDR, + .dumpit = getaddr_dumpit, .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_NEWROUTE, + .doit = route_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_DELROUTE, + .doit = route_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_GETROUTE, + .dumpit = route_dumpit, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; int __init phonet_netlink_register(void) diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 8f070ee7e742..d1cfceeff133 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -40,10 +40,6 @@ #include "rds.h" struct workqueue_struct *rds_ib_mr_wq; -struct rds_ib_dereg_odp_mr { - struct work_struct work; - struct ib_mr *mr; -}; static void rds_ib_odp_mr_worker(struct work_struct *work); diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index c268c2b011f4..a8e21060112f 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -32,8 +32,12 @@ static int rfkill_gpio_set_power(void *data, bool blocked) { struct rfkill_gpio_data *rfkill = data; - if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled) - clk_enable(rfkill->clk); + if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled) { + int ret = clk_enable(rfkill->clk); + + if (ret) + return ret; + } gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked); gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 1539d315afe7..694c4df7a1a3 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -337,9 +337,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work) */ rxrpc_purge_queue(&conn->rx_queue); - if (conn->tx_data_alloc.va) - __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va), - conn->tx_data_alloc.pagecnt_bias); + page_frag_cache_drain(&conn->tx_data_alloc); call_rcu(&conn->rcu, rxrpc_rcu_free_connection); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index f9623ace2201..2792d2304605 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -452,9 +452,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local) #endif rxrpc_purge_queue(&local->rx_queue); rxrpc_purge_client_connections(local); - if (local->tx_alloc.va) - __page_frag_cache_drain(virt_to_page(local->tx_alloc.va), - local->tx_alloc.pagecnt_bias); + page_frag_cache_drain(&local->tx_alloc); } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 23d18fe5de9f..6abb8eec1b2b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -29,6 +29,7 @@ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, call->send_abort_why = why; call->send_abort_err = error; call->send_abort_seq = 0; + trace_rxrpc_abort_call(call, abort_code); /* Request abort locklessly vs rxrpc_input_call_event(). */ smp_store_release(&call->send_abort, abort_code); rxrpc_poke_call(call, rxrpc_call_poke_abort); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index eecad65fec92..839790043256 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -504,6 +504,50 @@ nla_put_failure: return -1; } +static int +tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + int err = -EINVAL; + u32 flags; + + if (tcf_action_dump_terse(skb, a, false)) + goto nla_put_failure; + + if (a->hw_stats != TCA_ACT_HW_STATS_ANY && + nla_put_bitfield32(skb, TCA_ACT_HW_STATS, + a->hw_stats, TCA_ACT_HW_STATS_ANY)) + goto nla_put_failure; + + if (a->used_hw_stats_valid && + nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS, + a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) + goto nla_put_failure; + + flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; + if (flags && + nla_put_bitfield32(skb, TCA_ACT_FLAGS, + flags, flags)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) + goto nla_put_failure; + + nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + err = tcf_action_dump_old(skb, a, bind, ref); + if (err > 0) { + nla_nest_end(skb, nest); + return err; + } + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { @@ -1190,51 +1234,6 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) return a->ops->dump(skb, a, bind, ref); } -int -tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) -{ - int err = -EINVAL; - unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nest; - u32 flags; - - if (tcf_action_dump_terse(skb, a, false)) - goto nla_put_failure; - - if (a->hw_stats != TCA_ACT_HW_STATS_ANY && - nla_put_bitfield32(skb, TCA_ACT_HW_STATS, - a->hw_stats, TCA_ACT_HW_STATS_ANY)) - goto nla_put_failure; - - if (a->used_hw_stats_valid && - nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS, - a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) - goto nla_put_failure; - - flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; - if (flags && - nla_put_bitfield32(skb, TCA_ACT_FLAGS, - flags, flags)) - goto nla_put_failure; - - if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) - goto nla_put_failure; - - nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - err = tcf_action_dump_old(skb, a, bind, ref); - if (err > 0) { - nla_nest_end(skb, nest); - return err; - } - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} -EXPORT_SYMBOL(tcf_action_dump_1); - int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse) { @@ -2264,13 +2263,16 @@ out_module_put: return skb->len; } +static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWACTION, .doit = tc_ctl_action}, + {.msgtype = RTM_DELACTION, .doit = tc_ctl_action}, + {.msgtype = RTM_GETACTION, .doit = tc_ctl_action, + .dumpit = tc_dump_action}, +}; + static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, - 0); - + rtnl_register_many(tc_action_rtnl_msg_handlers); return 0; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2197eb625658..c02f39efc6ef 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1183,9 +1183,8 @@ static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, range->min_addr.ip = nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]); - range->max_addr.ip = max_attr ? - nla_get_in_addr(max_attr) : - range->min_addr.ip; + range->max_addr.ip = + nla_get_in_addr_default(max_attr, range->min_addr.ip); } else if (tb[TCA_CT_NAT_IPV6_MIN]) { struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX]; @@ -1314,8 +1313,9 @@ static int tcf_ct_fill_params(struct net *net, err = -EINVAL; goto err; } - family = tb[TCA_CT_HELPER_FAMILY] ? nla_get_u8(tb[TCA_CT_HELPER_FAMILY]) : AF_INET; - proto = tb[TCA_CT_HELPER_PROTO] ? nla_get_u8(tb[TCA_CT_HELPER_PROTO]) : IPPROTO_TCP; + family = nla_get_u8_default(tb[TCA_CT_HELPER_FAMILY], AF_INET); + proto = nla_get_u8_default(tb[TCA_CT_HELPER_PROTO], + IPPROTO_TCP); err = nf_ct_add_helper(tmpl, name, family, proto, p->ct_action & TCA_CT_ACT_NAT, &p->helper); if (err) { diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 5dd41a012110..5b1241ddc758 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -197,8 +197,9 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, "dscp mask must be 6 contiguous bits"); return -EINVAL; } - dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ? - nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0; + dscpstatemask = + nla_get_u32_default(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK], + 0); /* mask & statemask must not overlap */ if (dscpmask & dscpstatemask) { NL_SET_ERR_MSG_ATTR(extack, @@ -243,8 +244,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, } cp_new->net = net; - cp_new->zone = tb[TCA_CTINFO_ZONE] ? - nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0; + cp_new->zone = nla_get_u16_default(tb[TCA_CTINFO_ZONE], 0); if (dscpmask) { cp_new->dscpmask = dscpmask; cp_new->dscpmaskshift = dscpmaskshift; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 1dd74125398a..91c0ec729823 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -190,15 +190,10 @@ static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry, entry->interval = interval; - if (tb[TCA_GATE_ENTRY_IPV]) - entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]); - else - entry->ipv = -1; + entry->ipv = nla_get_s32_default(tb[TCA_GATE_ENTRY_IPV], -1); - if (tb[TCA_GATE_ENTRY_MAX_OCTETS]) - entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]); - else - entry->maxoctets = -1; + entry->maxoctets = nla_get_s32_default(tb[TCA_GATE_ENTRY_MAX_OCTETS], + -1); return 0; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 44a37a71ae92..9f86f4e666d3 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -288,16 +288,14 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, } p->tcfm_action = parm->m_action; - p->tcfm_label = tb[TCA_MPLS_LABEL] ? nla_get_u32(tb[TCA_MPLS_LABEL]) : - ACT_MPLS_LABEL_NOT_SET; - p->tcfm_tc = tb[TCA_MPLS_TC] ? nla_get_u8(tb[TCA_MPLS_TC]) : - ACT_MPLS_TC_NOT_SET; - p->tcfm_ttl = tb[TCA_MPLS_TTL] ? nla_get_u8(tb[TCA_MPLS_TTL]) : - mpls_ttl; - p->tcfm_bos = tb[TCA_MPLS_BOS] ? nla_get_u8(tb[TCA_MPLS_BOS]) : - ACT_MPLS_BOS_NOT_SET; - p->tcfm_proto = tb[TCA_MPLS_PROTO] ? nla_get_be16(tb[TCA_MPLS_PROTO]) : - htons(ETH_P_MPLS_UC); + p->tcfm_label = nla_get_u32_default(tb[TCA_MPLS_LABEL], + ACT_MPLS_LABEL_NOT_SET); + p->tcfm_tc = nla_get_u8_default(tb[TCA_MPLS_TC], ACT_MPLS_TC_NOT_SET); + p->tcfm_ttl = nla_get_u8_default(tb[TCA_MPLS_TTL], mpls_ttl); + p->tcfm_bos = nla_get_u8_default(tb[TCA_MPLS_BOS], + ACT_MPLS_BOS_NOT_SET); + p->tcfm_proto = nla_get_be16_default(tb[TCA_MPLS_PROTO], + htons(ETH_P_MPLS_UC)); spin_lock_bh(&m->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8555125ed34d..a214ed681142 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -167,8 +167,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } if (R_tab) { new->rate_present = true; - rate64 = tb[TCA_POLICE_RATE64] ? - nla_get_u64(tb[TCA_POLICE_RATE64]) : 0; + rate64 = nla_get_u64_default(tb[TCA_POLICE_RATE64], 0); psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64); qdisc_put_rtab(R_tab); } else { @@ -176,8 +175,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } if (P_tab) { new->peak_present = true; - prate64 = tb[TCA_POLICE_PEAKRATE64] ? - nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0; + prate64 = nla_get_u64_default(tb[TCA_POLICE_PEAKRATE64], 0); psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64); qdisc_put_rtab(P_tab); } else { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bbc778c233c8..7578e27260c9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1933,7 +1933,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, struct tcf_chain_info *chain_info, u32 protocol, u32 prio, - bool prio_allocate); + bool prio_allocate, + struct netlink_ext_ack *extack); /* Try to insert new proto. * If proto with specified priority already exists, free new proto @@ -1957,8 +1958,7 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, return ERR_PTR(-EAGAIN); } - tp = tcf_chain_tp_find(chain, &chain_info, - protocol, prio, false); + tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, NULL); if (!tp) err = tcf_chain_tp_insert(chain, &chain_info, tp_new); mutex_unlock(&chain->filter_chain_lock); @@ -2018,7 +2018,8 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, struct tcf_chain_info *chain_info, u32 protocol, u32 prio, - bool prio_allocate) + bool prio_allocate, + struct netlink_ext_ack *extack) { struct tcf_proto **pprev; struct tcf_proto *tp; @@ -2029,9 +2030,14 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, pprev = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { - if (prio_allocate || - (tp->protocol != protocol && protocol)) + if (prio_allocate) { + NL_SET_ERR_MSG(extack, "Lowest ID from auto-alloc range already in use"); + return ERR_PTR(-ENOSPC); + } + if (tp->protocol != protocol && protocol) { + NL_SET_ERR_MSG(extack, "Protocol mismatch for filter with specified priority"); return ERR_PTR(-EINVAL); + } } else { tp = NULL; } @@ -2297,7 +2303,7 @@ replay: } block->classid = parent; - chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; @@ -2312,9 +2318,8 @@ replay: mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, - prio, prio_allocate); + prio, prio_allocate, extack); if (IS_ERR(tp)) { - NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = PTR_ERR(tp); goto errout_locked; } @@ -2509,7 +2514,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } - chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; @@ -2539,10 +2544,13 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, - prio, false); - if (!tp || IS_ERR(tp)) { + prio, false, extack); + if (!tp) { + err = -ENOENT; NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); - err = tp ? PTR_ERR(tp) : -ENOENT; + goto errout_locked; + } else if (IS_ERR(tp)) { + err = PTR_ERR(tp); goto errout_locked; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); @@ -2664,7 +2672,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } - chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; @@ -2679,11 +2687,14 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, - prio, false); + prio, false, extack); mutex_unlock(&chain->filter_chain_lock); - if (!tp || IS_ERR(tp)) { + if (!tp) { + err = -ENOENT; NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); - err = tp ? PTR_ERR(tp) : -ENOENT; + goto errout; + } else if (IS_ERR(tp)) { + err = PTR_ERR(tp); goto errout; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); @@ -3104,7 +3115,7 @@ replay: if (IS_ERR(block)) return PTR_ERR(block); - chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; + chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; @@ -4056,6 +4067,19 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static const struct rtnl_msg_handler tc_filter_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWTFILTER, .doit = tc_new_tfilter, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_DELTFILTER, .doit = tc_del_tfilter, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_GETTFILTER, .doit = tc_get_tfilter, + .dumpit = tc_dump_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_NEWCHAIN, .doit = tc_ctl_chain}, + {.msgtype = RTM_DELCHAIN, .doit = tc_ctl_chain}, + {.msgtype = RTM_GETCHAIN, .doit = tc_ctl_chain, + .dumpit = tc_dump_chain}, +}; + static int __init tc_filter_init(void) { int err; @@ -4069,17 +4093,7 @@ static int __init tc_filter_init(void) goto err_register_pernet_subsys; xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1); - - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, - tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, - tc_dump_chain, 0); + rtnl_register_many(tc_filter_rtnl_msg_handlers); return 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a1d27bc039a3..300430b8c4d2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2420,6 +2420,17 @@ static struct pernet_operations psched_net_ops = { DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); #endif +static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc}, + {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc}, + {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc, + .dumpit = tc_dump_qdisc}, + {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass}, + {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass}, + {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass, + .dumpit = tc_dump_tclass}, +}; + static int __init pktsched_init(void) { int err; @@ -2438,14 +2449,7 @@ static int __init pktsched_init(void) register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, - 0); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, - 0); + rtnl_register_many(psched_rtnl_msg_handlers); tc_wrapper_init(); diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 939425da1895..8c9a0400c862 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -310,7 +310,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; - int port_rate; + s64 port_rate; int err; err = __ethtool_get_link_ksettings(dev, &ecmd); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 91072010923d..1e940ad0d2fa 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -356,7 +356,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, tb[TCA_CHOKE_STAB] == NULL) return -EINVAL; - max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0; + max_P = nla_get_u32_default(tb[TCA_CHOKE_MAX_P], 0); ctl = nla_data(tb[TCA_CHOKE_PARMS]); stab = nla_data(tb[TCA_CHOKE_STAB]); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 19a49af5a9e5..a97638bef6da 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -111,6 +111,7 @@ struct fq_perband_flows { struct fq_sched_data { /* Read mostly cache line */ + u64 offload_horizon; u32 quantum; u32 initial_quantum; u32 flow_refill_delay; @@ -299,7 +300,7 @@ static void fq_gc(struct fq_sched_data *q, } /* Fast path can be used if : - * 1) Packet tstamp is in the past. + * 1) Packet tstamp is in the past, or within the pacing offload horizon. * 2) FQ qlen == 0 OR * (no flow is currently eligible for transmit, * AND fast path queue has less than 8 packets) @@ -314,7 +315,7 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb, const struct fq_sched_data *q = qdisc_priv(sch); const struct sock *sk; - if (fq_skb_cb(skb)->time_to_send > now) + if (fq_skb_cb(skb)->time_to_send > now + q->offload_horizon) return false; if (sch->q.qlen != 0) { @@ -361,8 +362,9 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, * 3) We do not want to rate limit them (eg SYNFLOOD attack), * especially if the listener set SO_MAX_PACING_RATE * 4) We pretend they are orphaned + * TCP can also associate TIME_WAIT sockets with RST or ACK packets. */ - if (!sk || sk_listener(sk)) { + if (!sk || sk_listener_or_tw(sk)) { unsigned long hash = skb_get_hash(skb) & q->orphan_mask; /* By forcing low order bit to 1, we make sure to not @@ -595,15 +597,18 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) unsigned long sample; struct rb_node *p; - if (q->time_next_delayed_flow > now) + if (q->time_next_delayed_flow > now + q->offload_horizon) return; /* Update unthrottle latency EWMA. * This is cheap and can help diagnosing timer/latency problems. */ sample = (unsigned long)(now - q->time_next_delayed_flow); - q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; - q->unthrottle_latency_ns += sample >> 3; + if ((long)sample > 0) { + q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; + q->unthrottle_latency_ns += sample >> 3; + } + now += q->offload_horizon; q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { @@ -687,7 +692,7 @@ begin: u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send, f->time_next_packet); - if (now < time_next_packet) { + if (now + q->offload_horizon < time_next_packet) { head->first = f->next; f->time_next_packet = time_next_packet; fq_flow_set_throttled(q, f); @@ -925,6 +930,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)), [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)), + [TCA_FQ_OFFLOAD_HORIZON] = { .type = NLA_U32 }, }; /* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ @@ -1100,6 +1106,17 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, WRITE_ONCE(q->horizon_drop, nla_get_u8(tb[TCA_FQ_HORIZON_DROP])); + if (tb[TCA_FQ_OFFLOAD_HORIZON]) { + u64 offload_horizon = (u64)NSEC_PER_USEC * + nla_get_u32(tb[TCA_FQ_OFFLOAD_HORIZON]); + + if (offload_horizon <= qdisc_dev(sch)->max_pacing_offload_horizon) { + WRITE_ONCE(q->offload_horizon, offload_horizon); + } else { + NL_SET_ERR_MSG_MOD(extack, "invalid offload_horizon"); + err = -EINVAL; + } + } if (!err) { sch_tree_unlock(sch); @@ -1183,6 +1200,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) .bands = FQ_BANDS, }; struct nlattr *opts; + u64 offload_horizon; u64 ce_threshold; s32 weights[3]; u64 horizon; @@ -1199,6 +1217,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) horizon = READ_ONCE(q->horizon); do_div(horizon, NSEC_PER_USEC); + offload_horizon = READ_ONCE(q->offload_horizon); + do_div(offload_horizon, NSEC_PER_USEC); + if (nla_put_u32(skb, TCA_FQ_PLIMIT, READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, @@ -1224,6 +1245,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_TIMER_SLACK, READ_ONCE(q->timer_slack)) || nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) || + nla_put_u32(skb, TCA_FQ_OFFLOAD_HORIZON, (u32)offload_horizon) || nla_put_u8(skb, TCA_FQ_HORIZON_DROP, READ_ONCE(q->horizon_drop))) goto nla_put_failure; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 79ba9dc70254..7d2151c62c4a 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -668,7 +668,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } - max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; + max_P = nla_get_u32_default(tb[TCA_GRED_MAX_P], 0); ctl = nla_data(tb[TCA_GRED_PARMS]); stab = nla_data(tb[TCA_GRED_STAB]); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index ff3de37874e4..c31bc5489bdd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1810,8 +1810,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], NULL)); - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + rate64 = nla_get_u64_default(tb[TCA_HTB_RATE64], 0); + ceil64 = nla_get_u64_default(tb[TCA_HTB_CEIL64], 0); if (!cl) { /* new class */ struct net_device *dev = qdisc_dev(sch); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d584c0c25899..6a07cdbdb9e1 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -421,10 +421,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (err < 0) return err; - if (tb[TCA_QFQ_WEIGHT]) - weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); - else - weight = 1; + weight = nla_get_u32_default(tb[TCA_QFQ_WEIGHT], 1); if (tb[TCA_QFQ_LMAX]) { lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index b5f096588fae..6029bc29b51e 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -248,7 +248,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, tb[TCA_RED_STAB] == NULL) return -EINVAL; - max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; + max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0); ctl = nla_data(tb[TCA_RED_PARMS]); stab = nla_data(tb[TCA_RED_STAB]); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3b9245a3c767..a4b8296a2fa1 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -77,12 +77,6 @@ #define SFQ_EMPTY_SLOT 0xffff #define SFQ_DEFAULT_HASH_DIVISOR 1024 -/* We use 16 bits to store allot, and want to handle packets up to 64K - * Scale allot by 8 (1<<3) so that no overflow occurs. - */ -#define SFQ_ALLOT_SHIFT 3 -#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) - /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ typedef u16 sfq_index; @@ -104,7 +98,7 @@ struct sfq_slot { sfq_index next; /* next slot in sfq RR chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ - short allot; /* credit for this slot */ + int allot; /* credit for this slot */ unsigned int backlog; struct red_vars vars; @@ -120,7 +114,6 @@ struct sfq_sched_data { siphash_key_t perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; - unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct tcf_proto __rcu *filter_list; struct tcf_block *block; sfq_index *ht; /* Hash table ('divisor' slots) */ @@ -456,7 +449,7 @@ enqueue: */ q->tail = slot; /* We could use a bigger initial quantum for new flows */ - slot->allot = q->scaled_quantum; + slot->allot = q->quantum; } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; @@ -493,7 +486,7 @@ next_slot: slot = &q->slots[a]; if (slot->allot <= 0) { q->tail = slot; - slot->allot += q->scaled_quantum; + slot->allot += q->quantum; goto next_slot; } skb = slot_dequeue_head(slot); @@ -512,7 +505,7 @@ next_slot: } q->tail->next = next_a; } else { - slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); + slot->allot -= qdisc_pkt_len(skb); } return skb; } @@ -595,7 +588,7 @@ drop: q->tail->next = x; } q->tail = slot; - slot->allot = q->scaled_quantum; + slot->allot = q->quantum; } } sch->q.qlen -= dropped; @@ -628,7 +621,8 @@ static void sfq_perturbation(struct timer_list *t) rcu_read_unlock(); } -static int sfq_change(struct Qdisc *sch, struct nlattr *opt) +static int sfq_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); @@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; - /* slot->allot is a short, make sure quantum is not too big. */ - if (ctl->quantum) { - unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); - - if (scaled <= 0 || scaled > SHRT_MAX) - return -EINVAL; + if ((int)ctl->quantum < 0) { + NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); + return -EINVAL; } - if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) return -EINVAL; @@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) return -ENOMEM; } sch_tree_lock(sch); - if (ctl->quantum) { + if (ctl->quantum) q->quantum = ctl->quantum; - q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); - } WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); if (ctl->flows) q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); @@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt, q->divisor = SFQ_DEFAULT_HASH_DIVISOR; q->maxflows = SFQ_DEFAULT_FLOWS; q->quantum = psched_mtu(qdisc_dev(sch)); - q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; get_random_bytes(&q->perturbation, sizeof(q->perturbation)); if (opt) { - int err = sfq_change(sch, opt); + int err = sfq_change(sch, opt, extack); if (err) return err; } @@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; - xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; + xstats.allot = slot->allot; qs.qlen = slot->qlen; qs.backlog = slot->backlog; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8623dc0bafc0..a68e17891b0b 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1828,7 +1828,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, * zero; (2) the 'flags' of a "running" taprio instance cannot be * changed. */ - taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0; + taprio_flags = nla_get_u32_default(tb[TCA_TAPRIO_ATTR_FLAGS], 0); /* txtime-assist and full offload are mutually exclusive */ if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 38e2fbdcbeac..a9ed2ccab1bd 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -103,10 +103,10 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr) && addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { - sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); break; } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 39ca5403d4d7..8b9a1b96695e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -738,6 +738,20 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm */ spin_lock_bh(&net->sctp.addr_wq_lock); + + /* Avoid searching the queue or modifying it if there are no consumers, + * as it can lead to performance degradation if addresses are modified + * en-masse. + * + * If the queue already contains some events, update it anyway to avoid + * ugly races between new sessions and new address events. + */ + if (list_empty(&net->sctp.auto_asconf_splist) && + list_empty(&net->sctp.addr_waitq)) { + spin_unlock_bh(&net->sctp.addr_wq_lock); + return; + } + /* Offsets existing events in addr_wq */ addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { @@ -808,10 +822,10 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); break; } } diff --git a/net/shaper/Makefile b/net/shaper/Makefile new file mode 100644 index 000000000000..54af7169a331 --- /dev/null +++ b/net/shaper/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the net shaper infrastructure. +# +# Copyright (c) 2024, Red Hat, Inc. +# + +obj-y += shaper.o shaper_nl_gen.o diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c new file mode 100644 index 000000000000..15463062fe7b --- /dev/null +++ b/net/shaper/shaper.c @@ -0,0 +1,1438 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/bits.h> +#include <linux/bitfield.h> +#include <linux/idr.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/skbuff.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include <net/net_shaper.h> + +#include "shaper_nl_gen.h" + +#include "../core/dev.h" + +#define NET_SHAPER_SCOPE_SHIFT 26 +#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0) +#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT) + +#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK + +struct net_shaper_hierarchy { + struct xarray shapers; +}; + +struct net_shaper_nl_ctx { + struct net_shaper_binding binding; + netdevice_tracker dev_tracker; + unsigned long start_index; +}; + +static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx) +{ + return &((struct net_shaper_nl_ctx *)ctx)->binding; +} + +static void net_shaper_lock(struct net_shaper_binding *binding) +{ + switch (binding->type) { + case NET_SHAPER_BINDING_TYPE_NETDEV: + mutex_lock(&binding->netdev->lock); + break; + } +} + +static void net_shaper_unlock(struct net_shaper_binding *binding) +{ + switch (binding->type) { + case NET_SHAPER_BINDING_TYPE_NETDEV: + mutex_unlock(&binding->netdev->lock); + break; + } +} + +static struct net_shaper_hierarchy * +net_shaper_hierarchy(struct net_shaper_binding *binding) +{ + /* Pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. */ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + return READ_ONCE(binding->netdev->net_shaper_hierarchy); + + /* No other type supported yet. */ + return NULL; +} + +static const struct net_shaper_ops * +net_shaper_ops(struct net_shaper_binding *binding) +{ + if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV) + return binding->netdev->netdev_ops->net_shaper_ops; + + /* No other type supported yet. */ + return NULL; +} + +/* Count the number of [multi] attributes of the given type. */ +static int net_shaper_list_len(struct genl_info *info, int type) +{ + struct nlattr *attr; + int rem, cnt = 0; + + nla_for_each_attr_type(attr, type, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) + cnt++; + return cnt; +} + +static int net_shaper_handle_size(void) +{ + return nla_total_size(nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(u32))); +} + +static int net_shaper_fill_binding(struct sk_buff *msg, + const struct net_shaper_binding *binding, + u32 type) +{ + /* Should never happen, as currently only NETDEV is supported. */ + if (WARN_ON_ONCE(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV)) + return -EINVAL; + + if (nla_put_u32(msg, type, binding->netdev->ifindex)) + return -EMSGSIZE; + + return 0; +} + +static int net_shaper_fill_handle(struct sk_buff *msg, + const struct net_shaper_handle *handle, + u32 type) +{ + struct nlattr *handle_attr; + + if (handle->scope == NET_SHAPER_SCOPE_UNSPEC) + return 0; + + handle_attr = nla_nest_start(msg, type); + if (!handle_attr) + return -EMSGSIZE; + + if (nla_put_u32(msg, NET_SHAPER_A_HANDLE_SCOPE, handle->scope) || + (handle->scope >= NET_SHAPER_SCOPE_QUEUE && + nla_put_u32(msg, NET_SHAPER_A_HANDLE_ID, handle->id))) + goto handle_nest_cancel; + + nla_nest_end(msg, handle_attr); + return 0; + +handle_nest_cancel: + nla_nest_cancel(msg, handle_attr); + return -EMSGSIZE; +} + +static int +net_shaper_fill_one(struct sk_buff *msg, + const struct net_shaper_binding *binding, + const struct net_shaper *shaper, + const struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + return -EMSGSIZE; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || + net_shaper_fill_handle(msg, &shaper->parent, + NET_SHAPER_A_PARENT) || + net_shaper_fill_handle(msg, &shaper->handle, + NET_SHAPER_A_HANDLE) || + ((shaper->bw_min || shaper->bw_max || shaper->burst) && + nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) || + (shaper->bw_min && + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) || + (shaper->bw_max && + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) || + (shaper->burst && + nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) || + (shaper->priority && + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) || + (shaper->weight && + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/* Initialize the context fetching the relevant device and + * acquiring a reference to it. + */ +static int net_shaper_ctx_setup(const struct genl_info *info, int type, + struct net_shaper_nl_ctx *ctx) +{ + struct net *ns = genl_info_net(info); + struct net_device *dev; + int ifindex; + + if (GENL_REQ_ATTR_CHECK(info, type)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[type]); + dev = netdev_get_by_index(ns, ifindex, &ctx->dev_tracker, GFP_KERNEL); + if (!dev) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + return -ENOENT; + } + + if (!dev->netdev_ops->net_shaper_ops) { + NL_SET_BAD_ATTR(info->extack, info->attrs[type]); + netdev_put(dev, &ctx->dev_tracker); + return -EOPNOTSUPP; + } + + ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + ctx->binding.netdev = dev; + return 0; +} + +static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx) +{ + if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV) + netdev_put(ctx->binding.netdev, &ctx->dev_tracker); +} + +static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle) +{ + return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) | + FIELD_PREP(NET_SHAPER_ID_MASK, handle->id); +} + +static void net_shaper_index_to_handle(u32 index, + struct net_shaper_handle *handle) +{ + handle->scope = FIELD_GET(NET_SHAPER_SCOPE_MASK, index); + handle->id = FIELD_GET(NET_SHAPER_ID_MASK, index); +} + +static void net_shaper_default_parent(const struct net_shaper_handle *handle, + struct net_shaper_handle *parent) +{ + switch (handle->scope) { + case NET_SHAPER_SCOPE_UNSPEC: + case NET_SHAPER_SCOPE_NETDEV: + case __NET_SHAPER_SCOPE_MAX: + parent->scope = NET_SHAPER_SCOPE_UNSPEC; + break; + + case NET_SHAPER_SCOPE_QUEUE: + case NET_SHAPER_SCOPE_NODE: + parent->scope = NET_SHAPER_SCOPE_NETDEV; + break; + } + parent->id = 0; +} + +/* + * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as + * it's cleared by xa_store(). + */ +#define NET_SHAPER_NOT_VALID XA_MARK_1 + +static struct net_shaper * +net_shaper_lookup(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + u32 index = net_shaper_handle_to_index(handle); + + if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, + NET_SHAPER_NOT_VALID)) + return NULL; + + return xa_load(&hierarchy->shapers, index); +} + +/* Allocate on demand the per device shaper's hierarchy container. + * Called under the net shaper lock + */ +static struct net_shaper_hierarchy * +net_shaper_hierarchy_setup(struct net_shaper_binding *binding) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + + if (hierarchy) + return hierarchy; + + hierarchy = kmalloc(sizeof(*hierarchy), GFP_KERNEL); + if (!hierarchy) + return NULL; + + /* The flag is required for ID allocation */ + xa_init_flags(&hierarchy->shapers, XA_FLAGS_ALLOC); + + switch (binding->type) { + case NET_SHAPER_BINDING_TYPE_NETDEV: + /* Pairs with READ_ONCE in net_shaper_hierarchy. */ + WRITE_ONCE(binding->netdev->net_shaper_hierarchy, hierarchy); + break; + } + return hierarchy; +} + +/* Prepare the hierarchy container to actually insert the given shaper, doing + * in advance the needed allocations. + */ +static int net_shaper_pre_insert(struct net_shaper_binding *binding, + struct net_shaper_handle *handle, + struct netlink_ext_ack *extack) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *prev, *cur; + bool id_allocated = false; + int ret, index; + + if (!hierarchy) + return -ENOMEM; + + index = net_shaper_handle_to_index(handle); + cur = xa_load(&hierarchy->shapers, index); + if (cur) + return 0; + + /* Allocated a new id, if needed. */ + if (handle->scope == NET_SHAPER_SCOPE_NODE && + handle->id == NET_SHAPER_ID_UNSPEC) { + u32 min, max; + + handle->id = NET_SHAPER_ID_MASK - 1; + max = net_shaper_handle_to_index(handle); + handle->id = 0; + min = net_shaper_handle_to_index(handle); + + ret = xa_alloc(&hierarchy->shapers, &index, NULL, + XA_LIMIT(min, max), GFP_KERNEL); + if (ret < 0) { + NL_SET_ERR_MSG(extack, "Can't allocate new id for NODE shaper"); + return ret; + } + + net_shaper_index_to_handle(index, handle); + id_allocated = true; + } + + cur = kzalloc(sizeof(*cur), GFP_KERNEL); + if (!cur) { + ret = -ENOMEM; + goto free_id; + } + + /* Mark 'tentative' shaper inside the hierarchy container. + * xa_set_mark is a no-op if the previous store fails. + */ + xa_lock(&hierarchy->shapers); + prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); + __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID); + xa_unlock(&hierarchy->shapers); + if (xa_err(prev)) { + NL_SET_ERR_MSG(extack, "Can't insert shaper into device store"); + kfree_rcu(cur, rcu); + ret = xa_err(prev); + goto free_id; + } + return 0; + +free_id: + if (id_allocated) + xa_erase(&hierarchy->shapers, index); + return ret; +} + +/* Commit the tentative insert with the actual values. + * Must be called only after a successful net_shaper_pre_insert(). + */ +static void net_shaper_commit(struct net_shaper_binding *binding, + int nr_shapers, const struct net_shaper *shapers) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur; + int index; + int i; + + xa_lock(&hierarchy->shapers); + for (i = 0; i < nr_shapers; ++i) { + index = net_shaper_handle_to_index(&shapers[i].handle); + + cur = xa_load(&hierarchy->shapers, index); + if (WARN_ON_ONCE(!cur)) + continue; + + /* Successful update: drop the tentative mark + * and update the hierarchy container. + */ + __xa_clear_mark(&hierarchy->shapers, index, + NET_SHAPER_NOT_VALID); + *cur = shapers[i]; + } + xa_unlock(&hierarchy->shapers); +} + +/* Rollback all the tentative inserts from the hierarchy. */ +static void net_shaper_rollback(struct net_shaper_binding *binding) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur; + unsigned long index; + + if (!hierarchy) + return; + + xa_lock(&hierarchy->shapers); + xa_for_each_marked(&hierarchy->shapers, index, cur, + NET_SHAPER_NOT_VALID) { + __xa_erase(&hierarchy->shapers, index); + kfree(cur); + } + xa_unlock(&hierarchy->shapers); +} + +static int net_shaper_parse_handle(const struct nlattr *attr, + const struct genl_info *info, + struct net_shaper_handle *handle) +{ + struct nlattr *tb[NET_SHAPER_A_HANDLE_MAX + 1]; + struct nlattr *id_attr; + u32 id = 0; + int ret; + + ret = nla_parse_nested(tb, NET_SHAPER_A_HANDLE_MAX, attr, + net_shaper_handle_nl_policy, info->extack); + if (ret < 0) + return ret; + + if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, + NET_SHAPER_A_HANDLE_SCOPE)) + return -EINVAL; + + handle->scope = nla_get_u32(tb[NET_SHAPER_A_HANDLE_SCOPE]); + + /* The default id for NODE scope shapers is an invalid one + * to help the 'group' operation discriminate between new + * NODE shaper creation (ID_UNSPEC) and reuse of existing + * shaper (any other value). + */ + id_attr = tb[NET_SHAPER_A_HANDLE_ID]; + if (id_attr) + id = nla_get_u32(id_attr); + else if (handle->scope == NET_SHAPER_SCOPE_NODE) + id = NET_SHAPER_ID_UNSPEC; + + handle->id = id; + return 0; +} + +static int net_shaper_validate_caps(struct net_shaper_binding *binding, + struct nlattr **tb, + const struct genl_info *info, + struct net_shaper *shaper) +{ + const struct net_shaper_ops *ops = net_shaper_ops(binding); + struct nlattr *bad = NULL; + unsigned long caps = 0; + + ops->capabilities(binding, shaper->handle.scope, &caps); + + if (tb[NET_SHAPER_A_PRIORITY] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_PRIORITY))) + bad = tb[NET_SHAPER_A_PRIORITY]; + if (tb[NET_SHAPER_A_WEIGHT] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_WEIGHT))) + bad = tb[NET_SHAPER_A_WEIGHT]; + if (tb[NET_SHAPER_A_BW_MIN] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MIN))) + bad = tb[NET_SHAPER_A_BW_MIN]; + if (tb[NET_SHAPER_A_BW_MAX] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX))) + bad = tb[NET_SHAPER_A_BW_MAX]; + if (tb[NET_SHAPER_A_BURST] && + !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BURST))) + bad = tb[NET_SHAPER_A_BURST]; + + if (!caps) + bad = tb[NET_SHAPER_A_HANDLE]; + + if (bad) { + NL_SET_BAD_ATTR(info->extack, bad); + return -EOPNOTSUPP; + } + + if (shaper->handle.scope == NET_SHAPER_SCOPE_QUEUE && + binding->type == NET_SHAPER_BINDING_TYPE_NETDEV && + shaper->handle.id >= binding->netdev->real_num_tx_queues) { + NL_SET_ERR_MSG_FMT(info->extack, + "Not existing queue id %d max %d", + shaper->handle.id, + binding->netdev->real_num_tx_queues); + return -ENOENT; + } + + /* The metric is really used only if there is *any* rate-related + * setting, either in current attributes set or in pre-existing + * values. + */ + if (shaper->burst || shaper->bw_min || shaper->bw_max) { + u32 metric_cap = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS + + shaper->metric; + + /* The metric test can fail even when the user did not + * specify the METRIC attribute. Pointing to rate related + * attribute will be confusing, as the attribute itself + * could be indeed supported, with a different metric. + * Be more specific. + */ + if (!(caps & BIT(metric_cap))) { + NL_SET_ERR_MSG_FMT(info->extack, "Bad metric %d", + shaper->metric); + return -EOPNOTSUPP; + } + } + return 0; +} + +static int net_shaper_parse_info(struct net_shaper_binding *binding, + struct nlattr **tb, + const struct genl_info *info, + struct net_shaper *shaper, + bool *exists) +{ + struct net_shaper *old; + int ret; + + /* The shaper handle is the only mandatory attribute. */ + if (NL_REQ_ATTR_CHECK(info->extack, NULL, tb, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + ret = net_shaper_parse_handle(tb[NET_SHAPER_A_HANDLE], info, + &shaper->handle); + if (ret) + return ret; + + if (shaper->handle.scope == NET_SHAPER_SCOPE_UNSPEC) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]); + return -EINVAL; + } + + /* Fetch existing hierarchy, if any, so that user provide info will + * incrementally update the existing shaper configuration. + */ + old = net_shaper_lookup(binding, &shaper->handle); + if (old) + *shaper = *old; + *exists = !!old; + + if (tb[NET_SHAPER_A_METRIC]) + shaper->metric = nla_get_u32(tb[NET_SHAPER_A_METRIC]); + + if (tb[NET_SHAPER_A_BW_MIN]) + shaper->bw_min = nla_get_uint(tb[NET_SHAPER_A_BW_MIN]); + + if (tb[NET_SHAPER_A_BW_MAX]) + shaper->bw_max = nla_get_uint(tb[NET_SHAPER_A_BW_MAX]); + + if (tb[NET_SHAPER_A_BURST]) + shaper->burst = nla_get_uint(tb[NET_SHAPER_A_BURST]); + + if (tb[NET_SHAPER_A_PRIORITY]) + shaper->priority = nla_get_u32(tb[NET_SHAPER_A_PRIORITY]); + + if (tb[NET_SHAPER_A_WEIGHT]) + shaper->weight = nla_get_u32(tb[NET_SHAPER_A_WEIGHT]); + + ret = net_shaper_validate_caps(binding, tb, info, shaper); + if (ret < 0) + return ret; + + return 0; +} + +static int net_shaper_validate_nesting(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + const struct net_shaper_ops *ops = net_shaper_ops(binding); + unsigned long caps = 0; + + ops->capabilities(binding, shaper->handle.scope, &caps); + if (!(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_NESTING))) { + NL_SET_ERR_MSG_FMT(extack, + "Nesting not supported for scope %d", + shaper->handle.scope); + return -EOPNOTSUPP; + } + return 0; +} + +/* Fetch the existing leaf and update it with the user-provided + * attributes. + */ +static int net_shaper_parse_leaf(struct net_shaper_binding *binding, + const struct nlattr *attr, + const struct genl_info *info, + const struct net_shaper *node, + struct net_shaper *shaper) +{ + struct nlattr *tb[NET_SHAPER_A_WEIGHT + 1]; + bool exists; + int ret; + + ret = nla_parse_nested(tb, NET_SHAPER_A_WEIGHT, attr, + net_shaper_leaf_info_nl_policy, info->extack); + if (ret < 0) + return ret; + + ret = net_shaper_parse_info(binding, tb, info, shaper, &exists); + if (ret < 0) + return ret; + + if (shaper->handle.scope != NET_SHAPER_SCOPE_QUEUE) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]); + return -EINVAL; + } + + if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + ret = net_shaper_validate_nesting(binding, shaper, + info->extack); + if (ret < 0) + return ret; + } + + if (!exists) + net_shaper_default_parent(&shaper->handle, &shaper->parent); + return 0; +} + +/* Alike net_parse_shaper_info(), but additionally allow the user specifying + * the shaper's parent handle. + */ +static int net_shaper_parse_node(struct net_shaper_binding *binding, + struct nlattr **tb, + const struct genl_info *info, + struct net_shaper *shaper) +{ + bool exists; + int ret; + + ret = net_shaper_parse_info(binding, tb, info, shaper, &exists); + if (ret) + return ret; + + if (shaper->handle.scope != NET_SHAPER_SCOPE_NODE && + shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]); + return -EINVAL; + } + + if (tb[NET_SHAPER_A_PARENT]) { + ret = net_shaper_parse_handle(tb[NET_SHAPER_A_PARENT], info, + &shaper->parent); + if (ret) + return ret; + + if (shaper->parent.scope != NET_SHAPER_SCOPE_NODE && + shaper->parent.scope != NET_SHAPER_SCOPE_NETDEV) { + NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_PARENT]); + return -EINVAL; + } + } + return 0; +} + +static int net_shaper_generic_pre(struct genl_info *info, int type) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)info->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(info->ctx)); + + return net_shaper_ctx_setup(info, type, ctx); +} + +int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return net_shaper_generic_pre(info, NET_SHAPER_A_IFINDEX); +} + +static void net_shaper_generic_post(struct genl_info *info) +{ + net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)info->ctx); +} + +void net_shaper_nl_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + net_shaper_generic_post(info); +} + +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + + return net_shaper_ctx_setup(info, NET_SHAPER_A_IFINDEX, ctx); +} + +int net_shaper_nl_post_dumpit(struct netlink_callback *cb) +{ + net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)cb->ctx); + return 0; +} + +int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return net_shaper_generic_pre(info, NET_SHAPER_A_CAPS_IFINDEX); +} + +void net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + net_shaper_generic_post(info); +} + +int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + + return net_shaper_ctx_setup(genl_info_dump(cb), + NET_SHAPER_A_CAPS_IFINDEX, ctx); +} + +int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + + net_shaper_ctx_cleanup(ctx); + return 0; +} + +int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_binding *binding; + struct net_shaper_handle handle; + struct net_shaper *shaper; + struct sk_buff *msg; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, + &handle); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rcu_read_lock(); + shaper = net_shaper_lookup(binding, &handle); + if (!shaper) { + NL_SET_BAD_ATTR(info->extack, + info->attrs[NET_SHAPER_A_HANDLE]); + rcu_read_unlock(); + ret = -ENOENT; + goto free_msg; + } + + ret = net_shaper_fill_one(msg, binding, shaper, info); + rcu_read_unlock(); + if (ret) + goto free_msg; + + ret = genlmsg_reply(msg, info); + if (ret) + goto free_msg; + + return 0; + +free_msg: + nlmsg_free(msg); + return ret; +} + +int net_shaper_nl_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + struct net_shaper *shaper; + int ret = 0; + + /* Don't error out dumps performed before any set operation. */ + binding = net_shaper_binding_from_ctx(ctx); + hierarchy = net_shaper_hierarchy(binding); + if (!hierarchy) + return 0; + + rcu_read_lock(); + for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, + U32_MAX, XA_PRESENT)); ctx->start_index++) { + ret = net_shaper_fill_one(skb, binding, shaper, info); + if (ret) + break; + } + rcu_read_unlock(); + + return ret; +} + +int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + const struct net_shaper_ops *ops; + struct net_shaper_handle handle; + struct net_shaper shaper = {}; + bool exists; + int ret; + + binding = net_shaper_binding_from_ctx(info->ctx); + + net_shaper_lock(binding); + ret = net_shaper_parse_info(binding, info->attrs, info, &shaper, + &exists); + if (ret) + goto unlock; + + if (!exists) + net_shaper_default_parent(&shaper.handle, &shaper.parent); + + hierarchy = net_shaper_hierarchy_setup(binding); + if (!hierarchy) { + ret = -ENOMEM; + goto unlock; + } + + /* The 'set' operation can't create node-scope shapers. */ + handle = shaper.handle; + if (handle.scope == NET_SHAPER_SCOPE_NODE && + !net_shaper_lookup(binding, &handle)) { + ret = -ENOENT; + goto unlock; + } + + ret = net_shaper_pre_insert(binding, &handle, info->extack); + if (ret) + goto unlock; + + ops = net_shaper_ops(binding); + ret = ops->set(binding, &shaper, info->extack); + if (ret) { + net_shaper_rollback(binding); + goto unlock; + } + + net_shaper_commit(binding, 1, &shaper); + +unlock: + net_shaper_unlock(binding); + return ret; +} + +static int __net_shaper_delete(struct net_shaper_binding *binding, + struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper_handle parent_handle, handle = shaper->handle; + const struct net_shaper_ops *ops = net_shaper_ops(binding); + int ret; + +again: + parent_handle = shaper->parent; + + ret = ops->delete(binding, &handle, extack); + if (ret < 0) + return ret; + + xa_erase(&hierarchy->shapers, net_shaper_handle_to_index(&handle)); + kfree_rcu(shaper, rcu); + + /* Eventually delete the parent, if it is left over with no leaves. */ + if (parent_handle.scope == NET_SHAPER_SCOPE_NODE) { + shaper = net_shaper_lookup(binding, &parent_handle); + if (shaper && !--shaper->leaves) { + handle = parent_handle; + goto again; + } + } + return 0; +} + +static int net_shaper_handle_cmp(const struct net_shaper_handle *a, + const struct net_shaper_handle *b) +{ + /* Must avoid holes in struct net_shaper_handle. */ + BUILD_BUG_ON(sizeof(*a) != 8); + + return memcmp(a, b, sizeof(*a)); +} + +static int net_shaper_parent_from_leaves(int leaves_count, + const struct net_shaper *leaves, + struct net_shaper *node, + struct netlink_ext_ack *extack) +{ + struct net_shaper_handle parent = leaves[0].parent; + int i; + + for (i = 1; i < leaves_count; ++i) { + if (net_shaper_handle_cmp(&leaves[i].parent, &parent)) { + NL_SET_ERR_MSG_FMT(extack, "All the leaves shapers must have the same old parent"); + return -EINVAL; + } + } + + node->parent = parent; + return 0; +} + +static int __net_shaper_group(struct net_shaper_binding *binding, + bool update_node, int leaves_count, + struct net_shaper *leaves, + struct net_shaper *node, + struct netlink_ext_ack *extack) +{ + const struct net_shaper_ops *ops = net_shaper_ops(binding); + struct net_shaper_handle leaf_handle; + struct net_shaper *parent = NULL; + bool new_node = false; + int i, ret; + + if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + new_node = node->handle.id == NET_SHAPER_ID_UNSPEC; + + if (!new_node && !net_shaper_lookup(binding, &node->handle)) { + /* The related attribute is not available when + * reaching here from the delete() op. + */ + NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists", + node->handle.scope, node->handle.id); + return -ENOENT; + } + + /* When unspecified, the node parent scope is inherited from + * the leaves. + */ + if (node->parent.scope == NET_SHAPER_SCOPE_UNSPEC) { + ret = net_shaper_parent_from_leaves(leaves_count, + leaves, node, + extack); + if (ret) + return ret; + } + + } else { + net_shaper_default_parent(&node->handle, &node->parent); + } + + if (node->parent.scope == NET_SHAPER_SCOPE_NODE) { + parent = net_shaper_lookup(binding, &node->parent); + if (!parent) { + NL_SET_ERR_MSG_FMT(extack, "Node parent shaper %d:%d does not exists", + node->parent.scope, node->parent.id); + return -ENOENT; + } + + ret = net_shaper_validate_nesting(binding, node, extack); + if (ret < 0) + return ret; + } + + if (update_node) { + /* For newly created node scope shaper, the following will + * update the handle, due to id allocation. + */ + ret = net_shaper_pre_insert(binding, &node->handle, extack); + if (ret) + return ret; + } + + for (i = 0; i < leaves_count; ++i) { + leaf_handle = leaves[i].handle; + + ret = net_shaper_pre_insert(binding, &leaf_handle, extack); + if (ret) + goto rollback; + + if (!net_shaper_handle_cmp(&leaves[i].parent, &node->handle)) + continue; + + /* The leaves shapers will be nested to the node, update the + * linking accordingly. + */ + leaves[i].parent = node->handle; + node->leaves++; + } + + ret = ops->group(binding, leaves_count, leaves, node, extack); + if (ret < 0) + goto rollback; + + /* The node's parent gains a new leaf only when the node itself + * is created by this group operation + */ + if (new_node && parent) + parent->leaves++; + if (update_node) + net_shaper_commit(binding, 1, node); + net_shaper_commit(binding, leaves_count, leaves); + return 0; + +rollback: + net_shaper_rollback(binding); + return ret; +} + +static int net_shaper_pre_del_node(struct net_shaper_binding *binding, + const struct net_shaper *shaper, + struct netlink_ext_ack *extack) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur, *leaves, node = {}; + int ret, leaves_count = 0; + unsigned long index; + bool update_node; + + if (!shaper->leaves) + return 0; + + /* Fetch the new node information. */ + node.handle = shaper->parent; + cur = net_shaper_lookup(binding, &node.handle); + if (cur) { + node = *cur; + } else { + /* A scope NODE shaper can be nested only to the NETDEV scope + * shaper without creating the latter, this check may fail only + * if the data is in inconsistent status. + */ + if (WARN_ON_ONCE(node.handle.scope != NET_SHAPER_SCOPE_NETDEV)) + return -EINVAL; + } + + leaves = kcalloc(shaper->leaves, sizeof(struct net_shaper), + GFP_KERNEL); + if (!leaves) + return -ENOMEM; + + /* Build the leaves arrays. */ + xa_for_each(&hierarchy->shapers, index, cur) { + if (net_shaper_handle_cmp(&cur->parent, &shaper->handle)) + continue; + + if (WARN_ON_ONCE(leaves_count == shaper->leaves)) { + ret = -EINVAL; + goto free; + } + + leaves[leaves_count++] = *cur; + } + + /* When re-linking to the netdev shaper, avoid the eventual, implicit, + * creation of the new node, would be surprising since the user is + * doing a delete operation. + */ + update_node = node.handle.scope != NET_SHAPER_SCOPE_NETDEV; + ret = __net_shaper_group(binding, update_node, leaves_count, + leaves, &node, extack); + +free: + kfree(leaves); + return ret; +} + +int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + struct net_shaper_handle handle; + struct net_shaper *shaper; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + + net_shaper_lock(binding); + ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info, + &handle); + if (ret) + goto unlock; + + hierarchy = net_shaper_hierarchy(binding); + if (!hierarchy) { + ret = -ENOENT; + goto unlock; + } + + shaper = net_shaper_lookup(binding, &handle); + if (!shaper) { + ret = -ENOENT; + goto unlock; + } + + if (handle.scope == NET_SHAPER_SCOPE_NODE) { + ret = net_shaper_pre_del_node(binding, shaper, info->extack); + if (ret) + goto unlock; + } + + ret = __net_shaper_delete(binding, shaper, info->extack); + +unlock: + net_shaper_unlock(binding); + return ret; +} + +static int net_shaper_group_send_reply(struct net_shaper_binding *binding, + const struct net_shaper_handle *handle, + struct genl_info *info, + struct sk_buff *msg) +{ + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + goto free_msg; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || + net_shaper_fill_handle(msg, handle, NET_SHAPER_A_HANDLE)) + goto free_msg; + + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); + +free_msg: + /* Should never happen as msg is pre-allocated with enough space. */ + WARN_ONCE(true, "calculated message payload length (%d)", + net_shaper_handle_size()); + nlmsg_free(msg); + return -EMSGSIZE; +} + +int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper **old_nodes, *leaves, node = {}; + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding *binding; + int i, ret, rem, leaves_count; + int old_nodes_count = 0; + struct sk_buff *msg; + struct nlattr *attr; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + + /* The group operation is optional. */ + if (!net_shaper_ops(binding)->group) + return -EOPNOTSUPP; + + net_shaper_lock(binding); + leaves_count = net_shaper_list_len(info, NET_SHAPER_A_LEAVES); + if (!leaves_count) { + NL_SET_BAD_ATTR(info->extack, + info->attrs[NET_SHAPER_A_LEAVES]); + ret = -EINVAL; + goto unlock; + } + + leaves = kcalloc(leaves_count, sizeof(struct net_shaper) + + sizeof(struct net_shaper *), GFP_KERNEL); + if (!leaves) { + ret = -ENOMEM; + goto unlock; + } + old_nodes = (void *)&leaves[leaves_count]; + + ret = net_shaper_parse_node(binding, info->attrs, info, &node); + if (ret) + goto free_leaves; + + i = 0; + nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + if (WARN_ON_ONCE(i >= leaves_count)) + goto free_leaves; + + ret = net_shaper_parse_leaf(binding, attr, info, + &node, &leaves[i]); + if (ret) + goto free_leaves; + i++; + } + + /* Prepare the msg reply in advance, to avoid device operation + * rollback on allocation failure. + */ + msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL); + if (!msg) + goto free_leaves; + + hierarchy = net_shaper_hierarchy_setup(binding); + if (!hierarchy) { + ret = -ENOMEM; + goto free_msg; + } + + /* Record the node shapers that this group() operation can make + * childless for later cleanup. + */ + for (i = 0; i < leaves_count; i++) { + if (leaves[i].parent.scope == NET_SHAPER_SCOPE_NODE && + net_shaper_handle_cmp(&leaves[i].parent, &node.handle)) { + struct net_shaper *tmp; + + tmp = net_shaper_lookup(binding, &leaves[i].parent); + if (!tmp) + continue; + + old_nodes[old_nodes_count++] = tmp; + } + } + + ret = __net_shaper_group(binding, true, leaves_count, leaves, &node, + info->extack); + if (ret) + goto free_msg; + + /* Check if we need to delete any node left alone by the new leaves + * linkage. + */ + for (i = 0; i < old_nodes_count; ++i) { + struct net_shaper *tmp = old_nodes[i]; + + if (--tmp->leaves > 0) + continue; + + /* Errors here are not fatal: the grouping operation is + * completed, and user-space can still explicitly clean-up + * left-over nodes. + */ + __net_shaper_delete(binding, tmp, info->extack); + } + + ret = net_shaper_group_send_reply(binding, &node.handle, info, msg); + if (ret) + GENL_SET_ERR_MSG_FMT(info, "Can't send reply"); + +free_leaves: + kfree(leaves); + +unlock: + net_shaper_unlock(binding); + return ret; + +free_msg: + kfree_skb(msg); + goto free_leaves; +} + +static int +net_shaper_cap_fill_one(struct sk_buff *msg, + struct net_shaper_binding *binding, + enum net_shaper_scope scope, unsigned long flags, + const struct genl_info *info) +{ + unsigned long cur; + void *hdr; + + hdr = genlmsg_iput(msg, info); + if (!hdr) + return -EMSGSIZE; + + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_CAPS_IFINDEX) || + nla_put_u32(msg, NET_SHAPER_A_CAPS_SCOPE, scope)) + goto nla_put_failure; + + for (cur = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS; + cur <= NET_SHAPER_A_CAPS_MAX; ++cur) { + if (flags & BIT(cur) && nla_put_flag(msg, cur)) + goto nla_put_failure; + } + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net_shaper_binding *binding; + const struct net_shaper_ops *ops; + enum net_shaper_scope scope; + unsigned long flags = 0; + struct sk_buff *msg; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_CAPS_SCOPE)) + return -EINVAL; + + binding = net_shaper_binding_from_ctx(info->ctx); + scope = nla_get_u32(info->attrs[NET_SHAPER_A_CAPS_SCOPE]); + ops = net_shaper_ops(binding); + ops->capabilities(binding, scope, &flags); + if (!flags) + return -EOPNOTSUPP; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = net_shaper_cap_fill_one(msg, binding, scope, flags, info); + if (ret) + goto free_msg; + + ret = genlmsg_reply(msg, info); + if (ret) + goto free_msg; + return 0; + +free_msg: + nlmsg_free(msg); + return ret; +} + +int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct genl_info *info = genl_info_dump(cb); + struct net_shaper_binding *binding; + const struct net_shaper_ops *ops; + enum net_shaper_scope scope; + int ret; + + binding = net_shaper_binding_from_ctx(cb->ctx); + ops = net_shaper_ops(binding); + for (scope = 0; scope <= NET_SHAPER_SCOPE_MAX; ++scope) { + unsigned long flags = 0; + + ops->capabilities(binding, scope, &flags); + if (!flags) + continue; + + ret = net_shaper_cap_fill_one(skb, binding, scope, flags, + info); + if (ret) + return ret; + } + + return 0; +} + +static void net_shaper_flush(struct net_shaper_binding *binding) +{ + struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding); + struct net_shaper *cur; + unsigned long index; + + if (!hierarchy) + return; + + net_shaper_lock(binding); + xa_lock(&hierarchy->shapers); + xa_for_each(&hierarchy->shapers, index, cur) { + __xa_erase(&hierarchy->shapers, index); + kfree(cur); + } + xa_unlock(&hierarchy->shapers); + net_shaper_unlock(binding); + + kfree(hierarchy); +} + +void net_shaper_flush_netdev(struct net_device *dev) +{ + struct net_shaper_binding binding = { + .type = NET_SHAPER_BINDING_TYPE_NETDEV, + .netdev = dev, + }; + + net_shaper_flush(&binding); +} + +void net_shaper_set_real_num_tx_queues(struct net_device *dev, + unsigned int txq) +{ + struct net_shaper_hierarchy *hierarchy; + struct net_shaper_binding binding; + int i; + + binding.type = NET_SHAPER_BINDING_TYPE_NETDEV; + binding.netdev = dev; + hierarchy = net_shaper_hierarchy(&binding); + if (!hierarchy) + return; + + /* Only drivers implementing shapers support ensure + * the lock is acquired in advance. + */ + lockdep_assert_held(&dev->lock); + + /* Take action only when decreasing the tx queue number. */ + for (i = txq; i < dev->real_num_tx_queues; ++i) { + struct net_shaper_handle handle, parent_handle; + struct net_shaper *shaper; + u32 index; + + handle.scope = NET_SHAPER_SCOPE_QUEUE; + handle.id = i; + shaper = net_shaper_lookup(&binding, &handle); + if (!shaper) + continue; + + /* Don't touch the H/W for the queue shaper, the drivers already + * deleted the queue and related resources. + */ + parent_handle = shaper->parent; + index = net_shaper_handle_to_index(&handle); + xa_erase(&hierarchy->shapers, index); + kfree_rcu(shaper, rcu); + + /* The recursion on parent does the full job. */ + if (parent_handle.scope != NET_SHAPER_SCOPE_NODE) + continue; + + shaper = net_shaper_lookup(&binding, &parent_handle); + if (shaper && !--shaper->leaves) + __net_shaper_delete(&binding, shaper, NULL); + } +} + +static int __init shaper_init(void) +{ + return genl_register_family(&net_shaper_nl_family); +} + +subsys_initcall(shaper_init); diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c new file mode 100644 index 000000000000..204c8ae8c7b1 --- /dev/null +++ b/net/shaper/shaper_nl_gen.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN kernel source */ + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "shaper_nl_gen.h" + +#include <uapi/linux/net_shaper.h> + +/* Common nested types */ +const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = { + [NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), + [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, }, +}; + +const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = { + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_GET - do */ +static const struct nla_policy net_shaper_get_do_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), +}; + +/* NET_SHAPER_CMD_GET - dump */ +static const struct nla_policy net_shaper_get_dump_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_SET - do */ +static const struct nla_policy net_shaper_set_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1), + [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BURST] = { .type = NLA_UINT, }, + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, +}; + +/* NET_SHAPER_CMD_DELETE - do */ +static const struct nla_policy net_shaper_delete_nl_policy[NET_SHAPER_A_IFINDEX + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), +}; + +/* NET_SHAPER_CMD_GROUP - do */ +static const struct nla_policy net_shaper_group_nl_policy[NET_SHAPER_A_LEAVES + 1] = { + [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_PARENT] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy), + [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1), + [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, }, + [NET_SHAPER_A_BURST] = { .type = NLA_UINT, }, + [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, }, + [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, }, + [NET_SHAPER_A_LEAVES] = NLA_POLICY_NESTED(net_shaper_leaf_info_nl_policy), +}; + +/* NET_SHAPER_CMD_CAP_GET - do */ +static const struct nla_policy net_shaper_cap_get_do_nl_policy[NET_SHAPER_A_CAPS_SCOPE + 1] = { + [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, }, + [NET_SHAPER_A_CAPS_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), +}; + +/* NET_SHAPER_CMD_CAP_GET - dump */ +static const struct nla_policy net_shaper_cap_get_dump_nl_policy[NET_SHAPER_A_CAPS_IFINDEX + 1] = { + [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, }, +}; + +/* Ops table for net_shaper */ +static const struct genl_split_ops net_shaper_nl_ops[] = { + { + .cmd = NET_SHAPER_CMD_GET, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_get_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_get_do_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_GET, + .start = net_shaper_nl_pre_dumpit, + .dumpit = net_shaper_nl_get_dumpit, + .done = net_shaper_nl_post_dumpit, + .policy = net_shaper_get_dump_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, + { + .cmd = NET_SHAPER_CMD_SET, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_set_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_set_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_DELETE, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_delete_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_delete_nl_policy, + .maxattr = NET_SHAPER_A_IFINDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_GROUP, + .pre_doit = net_shaper_nl_pre_doit, + .doit = net_shaper_nl_group_doit, + .post_doit = net_shaper_nl_post_doit, + .policy = net_shaper_group_nl_policy, + .maxattr = NET_SHAPER_A_LEAVES, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_CAP_GET, + .pre_doit = net_shaper_nl_cap_pre_doit, + .doit = net_shaper_nl_cap_get_doit, + .post_doit = net_shaper_nl_cap_post_doit, + .policy = net_shaper_cap_get_do_nl_policy, + .maxattr = NET_SHAPER_A_CAPS_SCOPE, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NET_SHAPER_CMD_CAP_GET, + .start = net_shaper_nl_cap_pre_dumpit, + .dumpit = net_shaper_nl_cap_get_dumpit, + .done = net_shaper_nl_cap_post_dumpit, + .policy = net_shaper_cap_get_dump_nl_policy, + .maxattr = NET_SHAPER_A_CAPS_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, +}; + +struct genl_family net_shaper_nl_family __ro_after_init = { + .name = NET_SHAPER_FAMILY_NAME, + .version = NET_SHAPER_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = net_shaper_nl_ops, + .n_split_ops = ARRAY_SIZE(net_shaper_nl_ops), +}; diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h new file mode 100644 index 000000000000..cb7f9026fc23 --- /dev/null +++ b/net/shaper/shaper_nl_gen.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_NET_SHAPER_GEN_H +#define _LINUX_NET_SHAPER_GEN_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <uapi/linux/net_shaper.h> + +/* Common nested types */ +extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1]; +extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1]; + +int net_shaper_nl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +void +net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +void +net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_pre_dumpit(struct netlink_callback *cb); +int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb); +int net_shaper_nl_post_dumpit(struct netlink_callback *cb); +int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb); + +int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info); +int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); + +extern struct genl_family net_shaper_nl_family; + +#endif /* _LINUX_NET_SHAPER_GEN_H */ diff --git a/net/smc/smc.h b/net/smc/smc.h index ad77d6b6b8d3..78ae10d06ed2 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -278,7 +278,7 @@ struct smc_connection { */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ - u8 freed : 1; /* normal termiation */ + u8 freed : 1; /* normal termination */ u8 out_of_sync : 1; /* out of sync with peer */ }; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 5625fda2960b..5fd6f5b8ef03 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -156,7 +156,7 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ } __aligned(4); struct smc_clc_msg_smcd { /* SMC-D GID information */ - struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */ + struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requester */ __be16 v2_ext_offset; /* SMC Version 2 Extension Offset */ u8 vendor_oui[3]; /* vendor organizationally unique identifier */ u8 vendor_exp_options[5]; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4e694860ece4..500952c2e67b 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -2321,7 +2321,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, } if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) goto out; - fallthrough; // try virtually continguous buf + fallthrough; // try virtually contiguous buf case SMCR_VIRT_CONT_BUFS: buf_desc->order = get_order(bufsize); buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 0db4e5f79ac4..69b54ecd6503 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -30,7 +30,7 @@ */ #define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for * SMC-R v2.1 and later negotiation, vendors or - * distrubutions may modify it to a value between + * distributions may modify it to a value between * 16-255 as needed. */ @@ -181,7 +181,7 @@ struct smc_link { */ #define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for * SMC-R v2.1 and later negotiation, vendors or - * distrubutions may modify it to a value between + * distributions may modify it to a value between * 1-2 as needed. */ diff --git a/net/socket.c b/net/socket.c index 87a573a704c5..9a117248f18f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -669,7 +669,7 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) +void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; @@ -1558,9 +1558,11 @@ int __sock_create(struct net *net, int family, int type, int protocol, err = pf->create(net, sock, protocol, kern); if (err < 0) { /* ->create should release the allocated sock->sk object on error - * but it may leave the dangling pointer + * and make sure sock->sk is set to NULL to avoid use-after-free */ - sock->sk = NULL; + DEBUG_NET_WARN_ONCE(sock->sk, + "%ps must clear sock->sk on failure, family: %d, type: %d, protocol: %d\n", + pf->create, family, type, protocol); goto out_module_put; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 825ec5357691..b785425c3315 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1608,7 +1608,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct page_frag_cache *pfc = &svsk->sk_frag_cache; struct socket *sock = svsk->sk_sock; trace_svcsock_free(svsk, sock); @@ -1618,8 +1617,7 @@ static void svc_sock_free(struct svc_xprt *xprt) sockfd_put(sock); else sock_release(sock); - if (pfc->va) - __page_frag_cache_drain(virt_to_head_page(pfc->va), - pfc->pagecnt_bias); + + page_frag_cache_drain(&svsk->sk_frag_cache); kfree(svsk); } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index dfd29160fe11..25b28b1434f5 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2420,6 +2420,7 @@ static int vsock_create(struct net *net, struct socket *sock, if (sock->type == SOCK_DGRAM) { ret = vsock_assign_transport(vsk, NULL); if (ret < 0) { + sock->sk = NULL; sock_put(sk); return ret; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index e2157e387217..56c232cf5b0f 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -549,6 +549,7 @@ static void hvs_destruct(struct vsock_sock *vsk) vmbus_hvsock_device_unregister(chan); kfree(hvs); + vsk->trans = NULL; } static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 10345388ad13..2d67b5f2010e 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -11,9 +11,6 @@ config WEXT_PROC depends on PROC_FS depends on WEXT_CORE -config WEXT_SPY - bool - config WEXT_PRIV bool @@ -188,19 +185,12 @@ config CFG80211_CRDA_SUPPORT If unsure, say Y. config CFG80211_WEXT - bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT + bool "cfg80211 wireless extensions compatibility" select WEXT_CORE - default y if CFG80211_WEXT_EXPORT help Enable this option if you need old userspace for wireless extensions with cfg80211-based drivers. -config CFG80211_WEXT_EXPORT - bool - help - Drivers should select this option if they require cfg80211's - wext compatibility symbols to be exported. - config CFG80211_KUNIT_TEST tristate "KUnit tests for cfg80211" if !KUNIT_ALL_TESTS depends on KUNIT @@ -212,36 +202,3 @@ config CFG80211_KUNIT_TEST If unsure, say N. endif # CFG80211 - -config LIB80211 - tristate - default n - help - This options enables a library of common routines used - by IEEE802.11 wireless LAN drivers. - - Drivers should select this themselves if needed. - -config LIB80211_CRYPT_WEP - tristate - select CRYPTO_LIB_ARC4 - -config LIB80211_CRYPT_CCMP - tristate - select CRYPTO - select CRYPTO_AES - select CRYPTO_CCM - -config LIB80211_CRYPT_TKIP - tristate - select CRYPTO_LIB_ARC4 - -config LIB80211_DEBUG - bool "lib80211 debugging messages" - depends on LIB80211 - default n - help - You can enable this if you want verbose debugging messages - from lib80211. - - If unsure, say N. diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 1d49cc8b6da1..62a83faf0e07 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,14 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CFG80211) += cfg80211.o -obj-$(CONFIG_LIB80211) += lib80211.o -obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o -obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o -obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o obj-y += tests/ obj-$(CONFIG_WEXT_CORE) += wext-core.o obj-$(CONFIG_WEXT_PROC) += wext-proc.o -obj-$(CONFIG_WEXT_SPY) += wext-spy.o obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o diff --git a/net/wireless/chan.c b/net/wireless/chan.c index e579d7e1425f..40b6375a5de4 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -289,7 +289,7 @@ static bool cfg80211_valid_center_freq(u32 center, /* * Valid channels are packed from lowest frequency towards higher ones. - * So test that the lower frequency alignes with one of these steps. + * So test that the lower frequency aligns with one of these steps. */ return (center - bw / 2 - 5945) % step == 0; } @@ -1628,6 +1628,7 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_reg_check_beaconing); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) { if (!rdev->ops->set_monitor_channel) @@ -1635,7 +1636,7 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, if (!cfg80211_has_monitors_only(rdev)) return -EBUSY; - return rdev_set_monitor_channel(rdev, chandef); + return rdev_set_monitor_channel(rdev, dev, chandef); } bool cfg80211_any_usable_channels(struct wiphy *wiphy, diff --git a/net/wireless/core.c b/net/wireless/core.c index 74ca18833df1..afbdc549fb4a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -603,16 +603,20 @@ use_default_name: } EXPORT_SYMBOL(wiphy_new_nm); -static int wiphy_verify_combinations(struct wiphy *wiphy) +static +int wiphy_verify_iface_combinations(struct wiphy *wiphy, + const struct ieee80211_iface_combination *iface_comb, + int n_iface_comb, + bool combined_radio) { const struct ieee80211_iface_combination *c; int i, j; - for (i = 0; i < wiphy->n_iface_combinations; i++) { + for (i = 0; i < n_iface_comb; i++) { u32 cnt = 0; u16 all_iftypes = 0; - c = &wiphy->iface_combinations[i]; + c = &iface_comb[i]; /* * Combinations with just one interface aren't real, @@ -625,9 +629,13 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) if (WARN_ON(!c->num_different_channels)) return -EINVAL; - /* DFS only works on one channel. */ - if (WARN_ON(c->radar_detect_widths && - (c->num_different_channels > 1))) + /* DFS only works on one channel. Avoid this check + * for multi-radio global combination, since it hold + * the capabilities of all radio combinations. + */ + if (!combined_radio && + WARN_ON(c->radar_detect_widths && + c->num_different_channels > 1)) return -EINVAL; if (WARN_ON(!c->n_limits)) @@ -648,13 +656,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) if (WARN_ON(wiphy->software_iftypes & types)) return -EINVAL; - /* Only a single P2P_DEVICE can be allowed */ - if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) && + /* Only a single P2P_DEVICE can be allowed, avoid this + * check for multi-radio global combination, since it + * hold the capabilities of all radio combinations. + */ + if (!combined_radio && + WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) && c->limits[j].max > 1)) return -EINVAL; - /* Only a single NAN can be allowed */ - if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) && + /* Only a single NAN can be allowed, avoid this + * check for multi-radio global combination, since it + * hold the capabilities of all radio combinations. + */ + if (!combined_radio && + WARN_ON(types & BIT(NL80211_IFTYPE_NAN) && c->limits[j].max > 1)) return -EINVAL; @@ -693,6 +709,34 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) return 0; } +static int wiphy_verify_combinations(struct wiphy *wiphy) +{ + int i, ret; + bool combined_radio = false; + + if (wiphy->n_radio) { + for (i = 0; i < wiphy->n_radio; i++) { + const struct wiphy_radio *radio = &wiphy->radio[i]; + + ret = wiphy_verify_iface_combinations(wiphy, + radio->iface_combinations, + radio->n_iface_combinations, + false); + if (ret) + return ret; + } + + combined_radio = true; + } + + ret = wiphy_verify_iface_combinations(wiphy, + wiphy->iface_combinations, + wiphy->n_iface_combinations, + combined_radio); + + return ret; +} + int wiphy_register(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -1387,6 +1431,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; + wdev->radio_mask = BIT(wdev->wiphy->n_radio) - 1; + if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) diff --git a/net/wireless/core.h b/net/wireless/core.h index 3b3e3cd7027a..4c45f994a8c0 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -516,6 +516,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start) } int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef); int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c deleted file mode 100644 index 64c447040786..000000000000 --- a/net/wireless/lib80211.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 -- common bits for IEEE802.11 drivers - * - * Copyright(c) 2008 John W. Linville <linville@tuxdriver.com> - * - * Portions copied from old ieee80211 component, w/ original copyright - * notices below: - * - * Host AP crypto routines - * - * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi> - * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com> - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/ctype.h> -#include <linux/ieee80211.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/string.h> - -#include <net/lib80211.h> - -#define DRV_DESCRIPTION "common routines for IEEE802.11 drivers" - -MODULE_DESCRIPTION(DRV_DESCRIPTION); -MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); -MODULE_LICENSE("GPL"); - -struct lib80211_crypto_alg { - struct list_head list; - const struct lib80211_crypto_ops *ops; -}; - -static LIST_HEAD(lib80211_crypto_algs); -static DEFINE_SPINLOCK(lib80211_crypto_lock); - -static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, - int force); -static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); -static void lib80211_crypt_deinit_handler(struct timer_list *t); - -int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, - spinlock_t *lock) -{ - memset(info, 0, sizeof(*info)); - - info->name = name; - info->lock = lock; - - INIT_LIST_HEAD(&info->crypt_deinit_list); - timer_setup(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, - 0); - - return 0; -} -EXPORT_SYMBOL(lib80211_crypt_info_init); - -void lib80211_crypt_info_free(struct lib80211_crypt_info *info) -{ - int i; - - lib80211_crypt_quiescing(info); - del_timer_sync(&info->crypt_deinit_timer); - lib80211_crypt_deinit_entries(info, 1); - - for (i = 0; i < NUM_WEP_KEYS; i++) { - struct lib80211_crypt_data *crypt = info->crypt[i]; - if (crypt) { - if (crypt->ops) { - crypt->ops->deinit(crypt->priv); - module_put(crypt->ops->owner); - } - kfree(crypt); - info->crypt[i] = NULL; - } - } -} -EXPORT_SYMBOL(lib80211_crypt_info_free); - -static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, - int force) -{ - struct lib80211_crypt_data *entry, *next; - unsigned long flags; - - spin_lock_irqsave(info->lock, flags); - list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) { - if (atomic_read(&entry->refcnt) != 0 && !force) - continue; - - list_del(&entry->list); - - if (entry->ops) { - entry->ops->deinit(entry->priv); - module_put(entry->ops->owner); - } - kfree(entry); - } - spin_unlock_irqrestore(info->lock, flags); -} - -/* After this, crypt_deinit_list won't accept new members */ -static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) -{ - unsigned long flags; - - spin_lock_irqsave(info->lock, flags); - info->crypt_quiesced = 1; - spin_unlock_irqrestore(info->lock, flags); -} - -static void lib80211_crypt_deinit_handler(struct timer_list *t) -{ - struct lib80211_crypt_info *info = from_timer(info, t, - crypt_deinit_timer); - unsigned long flags; - - lib80211_crypt_deinit_entries(info, 0); - - spin_lock_irqsave(info->lock, flags); - if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) { - printk(KERN_DEBUG "%s: entries remaining in delayed crypt " - "deletion list\n", info->name); - info->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&info->crypt_deinit_timer); - } - spin_unlock_irqrestore(info->lock, flags); -} - -void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, - struct lib80211_crypt_data **crypt) -{ - struct lib80211_crypt_data *tmp; - unsigned long flags; - - if (*crypt == NULL) - return; - - tmp = *crypt; - *crypt = NULL; - - /* must not run ops->deinit() while there may be pending encrypt or - * decrypt operations. Use a list of delayed deinits to avoid needing - * locking. */ - - spin_lock_irqsave(info->lock, flags); - if (!info->crypt_quiesced) { - list_add(&tmp->list, &info->crypt_deinit_list); - if (!timer_pending(&info->crypt_deinit_timer)) { - info->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&info->crypt_deinit_timer); - } - } - spin_unlock_irqrestore(info->lock, flags); -} -EXPORT_SYMBOL(lib80211_crypt_delayed_deinit); - -int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops) -{ - unsigned long flags; - struct lib80211_crypto_alg *alg; - - alg = kzalloc(sizeof(*alg), GFP_KERNEL); - if (alg == NULL) - return -ENOMEM; - - alg->ops = ops; - - spin_lock_irqsave(&lib80211_crypto_lock, flags); - list_add(&alg->list, &lib80211_crypto_algs); - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - - printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n", - ops->name); - - return 0; -} -EXPORT_SYMBOL(lib80211_register_crypto_ops); - -int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops) -{ - struct lib80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&lib80211_crypto_lock, flags); - list_for_each_entry(alg, &lib80211_crypto_algs, list) { - if (alg->ops == ops) - goto found; - } - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - return -EINVAL; - - found: - printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n", - ops->name); - list_del(&alg->list); - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - kfree(alg); - return 0; -} -EXPORT_SYMBOL(lib80211_unregister_crypto_ops); - -const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name) -{ - struct lib80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&lib80211_crypto_lock, flags); - list_for_each_entry(alg, &lib80211_crypto_algs, list) { - if (strcmp(alg->ops->name, name) == 0) - goto found; - } - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - return NULL; - - found: - spin_unlock_irqrestore(&lib80211_crypto_lock, flags); - return alg->ops; -} -EXPORT_SYMBOL(lib80211_get_crypto_ops); - -static void *lib80211_crypt_null_init(int keyidx) -{ - return (void *)1; -} - -static void lib80211_crypt_null_deinit(void *priv) -{ -} - -static const struct lib80211_crypto_ops lib80211_crypt_null = { - .name = "NULL", - .init = lib80211_crypt_null_init, - .deinit = lib80211_crypt_null_deinit, - .owner = THIS_MODULE, -}; - -static int __init lib80211_init(void) -{ - pr_info(DRV_DESCRIPTION "\n"); - return lib80211_register_crypto_ops(&lib80211_crypt_null); -} - -static void __exit lib80211_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_null); - BUG_ON(!list_empty(&lib80211_crypto_algs)); -} - -module_init(lib80211_init); -module_exit(lib80211_exit); diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c deleted file mode 100644 index 5aad139130e1..000000000000 --- a/net/wireless/lib80211_crypt_ccmp.c +++ /dev/null @@ -1,448 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 crypt: host-based CCMP encryption implementation for lib80211 - * - * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi> - * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> - */ - -#include <linux/kernel.h> -#include <linux/err.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/if_ether.h> -#include <linux/if_arp.h> -#include <asm/string.h> -#include <linux/wireless.h> - -#include <linux/ieee80211.h> - -#include <linux/crypto.h> -#include <crypto/aead.h> - -#include <net/lib80211.h> - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: CCMP"); -MODULE_LICENSE("GPL"); - -#define AES_BLOCK_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 - -struct lib80211_ccmp_data { - u8 key[CCMP_TK_LEN]; - int key_set; - - u8 tx_pn[CCMP_PN_LEN]; - u8 rx_pn[CCMP_PN_LEN]; - - u32 dot11RSNAStatsCCMPFormatErrors; - u32 dot11RSNAStatsCCMPReplays; - u32 dot11RSNAStatsCCMPDecryptErrors; - - int key_idx; - - struct crypto_aead *tfm; - - /* scratch buffers for virt_to_page() (crypto API) */ - u8 tx_aad[2 * AES_BLOCK_LEN]; - u8 rx_aad[2 * AES_BLOCK_LEN]; -}; - -static void *lib80211_ccmp_init(int key_idx) -{ - struct lib80211_ccmp_data *priv; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - goto fail; - priv->key_idx = key_idx; - - priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->tfm)) { - priv->tfm = NULL; - goto fail; - } - - return priv; - - fail: - if (priv) { - if (priv->tfm) - crypto_free_aead(priv->tfm); - kfree(priv); - } - - return NULL; -} - -static void lib80211_ccmp_deinit(void *priv) -{ - struct lib80211_ccmp_data *_priv = priv; - if (_priv && _priv->tfm) - crypto_free_aead(_priv->tfm); - kfree(priv); -} - -static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr, - const u8 *pn, u8 *iv, u8 *aad) -{ - u8 *pos, qc = 0; - size_t aad_len; - int a4_included, qc_included; - - a4_included = ieee80211_has_a4(hdr->frame_control); - qc_included = ieee80211_is_data_qos(hdr->frame_control); - - aad_len = 22; - if (a4_included) - aad_len += 6; - if (qc_included) { - pos = (u8 *) & hdr->addr4; - if (a4_included) - pos += 6; - qc = *pos & 0x0f; - aad_len += 2; - } - - /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC - * mode authentication are not allowed to collide, yet both are derived - * from the same vector. We only set L := 1 here to indicate that the - * data size can be represented in (L+1) bytes. The CCM layer will take - * care of storing the data length in the top (L+1) bytes and setting - * and clearing the other bits as is required to derive the two IVs. - */ - iv[0] = 0x1; - - /* Nonce: QC | A2 | PN */ - iv[1] = qc; - memcpy(iv + 2, hdr->addr2, ETH_ALEN); - memcpy(iv + 8, pn, CCMP_PN_LEN); - - /* AAD: - * FC with bits 4..6 and 11..13 masked to zero; 14 is always one - * A1 | A2 | A3 - * SC with bits 4..15 (seq#) masked to zero - * A4 (if present) - * QC (if present) - */ - pos = (u8 *) hdr; - aad[0] = pos[0] & 0x8f; - aad[1] = pos[1] & 0xc7; - memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); - pos = (u8 *) & hdr->seq_ctrl; - aad[20] = pos[0] & 0x0f; - aad[21] = 0; /* all bits masked */ - memset(aad + 22, 0, 8); - if (a4_included) - memcpy(aad + 22, hdr->addr4, ETH_ALEN); - if (qc_included) { - aad[a4_included ? 28 : 22] = qc; - /* rest of QC masked */ - } - return aad_len; -} - -static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, - u8 *aeskey, int keylen, void *priv) -{ - struct lib80211_ccmp_data *key = priv; - int i; - u8 *pos; - - if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len) - return -1; - - if (aeskey != NULL && keylen >= CCMP_TK_LEN) - memcpy(aeskey, key->key, CCMP_TK_LEN); - - pos = skb_push(skb, CCMP_HDR_LEN); - memmove(pos, pos + CCMP_HDR_LEN, hdr_len); - pos += hdr_len; - - i = CCMP_PN_LEN - 1; - while (i >= 0) { - key->tx_pn[i]++; - if (key->tx_pn[i] != 0) - break; - i--; - } - - *pos++ = key->tx_pn[5]; - *pos++ = key->tx_pn[4]; - *pos++ = 0; - *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ; - *pos++ = key->tx_pn[3]; - *pos++ = key->tx_pn[2]; - *pos++ = key->tx_pn[1]; - *pos++ = key->tx_pn[0]; - - return CCMP_HDR_LEN; -} - -static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_ccmp_data *key = priv; - struct ieee80211_hdr *hdr; - struct aead_request *req; - struct scatterlist sg[2]; - u8 *aad = key->tx_aad; - u8 iv[AES_BLOCK_LEN]; - int len, data_len, aad_len; - int ret; - - if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) - return -1; - - data_len = skb->len - hdr_len; - len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); - if (len < 0) - return -1; - - req = aead_request_alloc(key->tfm, GFP_ATOMIC); - if (!req) - return -ENOMEM; - - hdr = (struct ieee80211_hdr *)skb->data; - aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad); - - skb_put(skb, CCMP_MIC_LEN); - - sg_init_table(sg, 2); - sg_set_buf(&sg[0], aad, aad_len); - sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN, - data_len + CCMP_MIC_LEN); - - aead_request_set_callback(req, 0, NULL, NULL); - aead_request_set_ad(req, aad_len); - aead_request_set_crypt(req, sg, sg, data_len, iv); - - ret = crypto_aead_encrypt(req); - aead_request_free(req); - - return ret; -} - -/* - * deal with seq counter wrapping correctly. - * refer to timer_after() for jiffies wrapping handling - */ -static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) -{ - u32 iv32_n, iv16_n; - u32 iv32_o, iv16_o; - - iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3]; - iv16_n = (pn_n[4] << 8) | pn_n[5]; - - iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3]; - iv16_o = (pn_o[4] << 8) | pn_o[5]; - - if ((s32)iv32_n - (s32)iv32_o < 0 || - (iv32_n == iv32_o && iv16_n <= iv16_o)) - return 1; - return 0; -} - -static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_ccmp_data *key = priv; - u8 keyidx, *pos; - struct ieee80211_hdr *hdr; - struct aead_request *req; - struct scatterlist sg[2]; - u8 *aad = key->rx_aad; - u8 iv[AES_BLOCK_LEN]; - u8 pn[6]; - int aad_len, ret; - size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN; - - if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { - key->dot11RSNAStatsCCMPFormatErrors++; - return -1; - } - - hdr = (struct ieee80211_hdr *)skb->data; - pos = skb->data + hdr_len; - keyidx = pos[3]; - if (!(keyidx & (1 << 5))) { - net_dbg_ratelimited("CCMP: received packet without ExtIV flag from %pM\n", - hdr->addr2); - key->dot11RSNAStatsCCMPFormatErrors++; - return -2; - } - keyidx >>= 6; - if (key->key_idx != keyidx) { - net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n", - key->key_idx, keyidx); - return -6; - } - if (!key->key_set) { - net_dbg_ratelimited("CCMP: received packet from %pM with keyid=%d that does not have a configured key\n", - hdr->addr2, keyidx); - return -3; - } - - pn[0] = pos[7]; - pn[1] = pos[6]; - pn[2] = pos[5]; - pn[3] = pos[4]; - pn[4] = pos[1]; - pn[5] = pos[0]; - pos += 8; - - if (ccmp_replay_check(pn, key->rx_pn)) { -#ifdef CONFIG_LIB80211_DEBUG - net_dbg_ratelimited("CCMP: replay detected: STA=%pM previous PN %02x%02x%02x%02x%02x%02x received PN %02x%02x%02x%02x%02x%02x\n", - hdr->addr2, - key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], - key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); -#endif - key->dot11RSNAStatsCCMPReplays++; - return -4; - } - - req = aead_request_alloc(key->tfm, GFP_ATOMIC); - if (!req) - return -ENOMEM; - - aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad); - - sg_init_table(sg, 2); - sg_set_buf(&sg[0], aad, aad_len); - sg_set_buf(&sg[1], pos, data_len); - - aead_request_set_callback(req, 0, NULL, NULL); - aead_request_set_ad(req, aad_len); - aead_request_set_crypt(req, sg, sg, data_len, iv); - - ret = crypto_aead_decrypt(req); - aead_request_free(req); - - if (ret) { - net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n", - hdr->addr2, ret); - key->dot11RSNAStatsCCMPDecryptErrors++; - return -5; - } - - memcpy(key->rx_pn, pn, CCMP_PN_LEN); - - /* Remove hdr and MIC */ - memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len); - skb_pull(skb, CCMP_HDR_LEN); - skb_trim(skb, skb->len - CCMP_MIC_LEN); - - return keyidx; -} - -static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_ccmp_data *data = priv; - int keyidx; - struct crypto_aead *tfm = data->tfm; - - keyidx = data->key_idx; - memset(data, 0, sizeof(*data)); - data->key_idx = keyidx; - data->tfm = tfm; - if (len == CCMP_TK_LEN) { - memcpy(data->key, key, CCMP_TK_LEN); - data->key_set = 1; - if (seq) { - data->rx_pn[0] = seq[5]; - data->rx_pn[1] = seq[4]; - data->rx_pn[2] = seq[3]; - data->rx_pn[3] = seq[2]; - data->rx_pn[4] = seq[1]; - data->rx_pn[5] = seq[0]; - } - if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) || - crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN)) - return -1; - } else if (len == 0) - data->key_set = 0; - else - return -1; - - return 0; -} - -static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_ccmp_data *data = priv; - - if (len < CCMP_TK_LEN) - return -1; - - if (!data->key_set) - return 0; - memcpy(key, data->key, CCMP_TK_LEN); - - if (seq) { - seq[0] = data->tx_pn[5]; - seq[1] = data->tx_pn[4]; - seq[2] = data->tx_pn[3]; - seq[3] = data->tx_pn[2]; - seq[4] = data->tx_pn[1]; - seq[5] = data->tx_pn[0]; - } - - return CCMP_TK_LEN; -} - -static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv) -{ - struct lib80211_ccmp_data *ccmp = priv; - - seq_printf(m, - "key[%d] alg=CCMP key_set=%d " - "tx_pn=%02x%02x%02x%02x%02x%02x " - "rx_pn=%02x%02x%02x%02x%02x%02x " - "format_errors=%d replays=%d decrypt_errors=%d\n", - ccmp->key_idx, ccmp->key_set, - ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], - ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], - ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], - ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], - ccmp->dot11RSNAStatsCCMPFormatErrors, - ccmp->dot11RSNAStatsCCMPReplays, - ccmp->dot11RSNAStatsCCMPDecryptErrors); -} - -static const struct lib80211_crypto_ops lib80211_crypt_ccmp = { - .name = "CCMP", - .init = lib80211_ccmp_init, - .deinit = lib80211_ccmp_deinit, - .encrypt_mpdu = lib80211_ccmp_encrypt, - .decrypt_mpdu = lib80211_ccmp_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = lib80211_ccmp_set_key, - .get_key = lib80211_ccmp_get_key, - .print_stats = lib80211_ccmp_print_stats, - .extra_mpdu_prefix_len = CCMP_HDR_LEN, - .extra_mpdu_postfix_len = CCMP_MIC_LEN, - .owner = THIS_MODULE, -}; - -static int __init lib80211_crypto_ccmp_init(void) -{ - return lib80211_register_crypto_ops(&lib80211_crypt_ccmp); -} - -static void __exit lib80211_crypto_ccmp_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp); -} - -module_init(lib80211_crypto_ccmp_init); -module_exit(lib80211_crypto_ccmp_exit); diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c deleted file mode 100644 index 63e68e5e121e..000000000000 --- a/net/wireless/lib80211_crypt_tkip.c +++ /dev/null @@ -1,738 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 crypt: host-based TKIP encryption implementation for lib80211 - * - * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi> - * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/err.h> -#include <linux/fips.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/scatterlist.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/mm.h> -#include <linux/if_ether.h> -#include <linux/if_arp.h> -#include <asm/string.h> - -#include <linux/wireless.h> -#include <linux/ieee80211.h> -#include <net/iw_handler.h> - -#include <crypto/arc4.h> -#include <crypto/hash.h> -#include <linux/crypto.h> -#include <linux/crc32.h> - -#include <net/lib80211.h> - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("lib80211 crypt: TKIP"); -MODULE_LICENSE("GPL"); - -#define TKIP_HDR_LEN 8 - -struct lib80211_tkip_data { -#define TKIP_KEY_LEN 32 - u8 key[TKIP_KEY_LEN]; - int key_set; - - u32 tx_iv32; - u16 tx_iv16; - u16 tx_ttak[5]; - int tx_phase1_done; - - u32 rx_iv32; - u16 rx_iv16; - u16 rx_ttak[5]; - int rx_phase1_done; - u32 rx_iv32_new; - u16 rx_iv16_new; - - u32 dot11RSNAStatsTKIPReplays; - u32 dot11RSNAStatsTKIPICVErrors; - u32 dot11RSNAStatsTKIPLocalMICFailures; - - int key_idx; - - struct arc4_ctx rx_ctx_arc4; - struct arc4_ctx tx_ctx_arc4; - struct crypto_shash *rx_tfm_michael; - struct crypto_shash *tx_tfm_michael; - - /* scratch buffers for virt_to_page() (crypto API) */ - u8 rx_hdr[16], tx_hdr[16]; - - unsigned long flags; -}; - -static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv) -{ - struct lib80211_tkip_data *_priv = priv; - unsigned long old_flags = _priv->flags; - _priv->flags = flags; - return old_flags; -} - -static unsigned long lib80211_tkip_get_flags(void *priv) -{ - struct lib80211_tkip_data *_priv = priv; - return _priv->flags; -} - -static void *lib80211_tkip_init(int key_idx) -{ - struct lib80211_tkip_data *priv; - - if (fips_enabled) - return NULL; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - goto fail; - - priv->key_idx = key_idx; - - priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(priv->tx_tfm_michael)) { - priv->tx_tfm_michael = NULL; - goto fail; - } - - priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(priv->rx_tfm_michael)) { - priv->rx_tfm_michael = NULL; - goto fail; - } - - return priv; - - fail: - if (priv) { - crypto_free_shash(priv->tx_tfm_michael); - crypto_free_shash(priv->rx_tfm_michael); - kfree(priv); - } - - return NULL; -} - -static void lib80211_tkip_deinit(void *priv) -{ - struct lib80211_tkip_data *_priv = priv; - if (_priv) { - crypto_free_shash(_priv->tx_tfm_michael); - crypto_free_shash(_priv->rx_tfm_michael); - } - kfree_sensitive(priv); -} - -static inline u16 RotR1(u16 val) -{ - return (val >> 1) | (val << 15); -} - -static inline u8 Lo8(u16 val) -{ - return val & 0xff; -} - -static inline u8 Hi8(u16 val) -{ - return val >> 8; -} - -static inline u16 Lo16(u32 val) -{ - return val & 0xffff; -} - -static inline u16 Hi16(u32 val) -{ - return val >> 16; -} - -static inline u16 Mk16(u8 hi, u8 lo) -{ - return lo | (((u16) hi) << 8); -} - -static inline u16 Mk16_le(__le16 * v) -{ - return le16_to_cpu(*v); -} - -static const u16 Sbox[256] = { - 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, - 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, - 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, - 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B, - 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F, - 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F, - 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5, - 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F, - 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB, - 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397, - 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED, - 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A, - 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194, - 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3, - 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104, - 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D, - 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39, - 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695, - 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83, - 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76, - 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4, - 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B, - 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0, - 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018, - 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751, - 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85, - 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12, - 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9, - 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7, - 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A, - 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8, - 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, -}; - -static inline u16 _S_(u16 v) -{ - u16 t = Sbox[Hi8(v)]; - return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8)); -} - -#define PHASE1_LOOP_COUNT 8 - -static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA, - u32 IV32) -{ - int i, j; - - /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */ - TTAK[0] = Lo16(IV32); - TTAK[1] = Hi16(IV32); - TTAK[2] = Mk16(TA[1], TA[0]); - TTAK[3] = Mk16(TA[3], TA[2]); - TTAK[4] = Mk16(TA[5], TA[4]); - - for (i = 0; i < PHASE1_LOOP_COUNT; i++) { - j = 2 * (i & 1); - TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j])); - TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j])); - TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j])); - TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j])); - TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i; - } -} - -static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, - u16 IV16) -{ - /* Make temporary area overlap WEP seed so that the final copy can be - * avoided on little endian hosts. */ - u16 *PPK = (u16 *) & WEPSeed[4]; - - /* Step 1 - make copy of TTAK and bring in TSC */ - PPK[0] = TTAK[0]; - PPK[1] = TTAK[1]; - PPK[2] = TTAK[2]; - PPK[3] = TTAK[3]; - PPK[4] = TTAK[4]; - PPK[5] = TTAK[4] + IV16; - - /* Step 2 - 96-bit bijective mixing using S-box */ - PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0])); - PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2])); - PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4])); - PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6])); - PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8])); - PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10])); - - PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12])); - PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14])); - PPK[2] += RotR1(PPK[1]); - PPK[3] += RotR1(PPK[2]); - PPK[4] += RotR1(PPK[3]); - PPK[5] += RotR1(PPK[4]); - - /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value - * WEPSeed[0..2] is transmitted as WEP IV */ - WEPSeed[0] = Hi8(IV16); - WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; - WEPSeed[2] = Lo8(IV16); - WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1); - -#ifdef __BIG_ENDIAN - { - int i; - for (i = 0; i < 6; i++) - PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8); - } -#endif -} - -static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len, - u8 * rc4key, int keylen, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 *pos; - struct ieee80211_hdr *hdr; - - hdr = (struct ieee80211_hdr *)skb->data; - - if (skb_headroom(skb) < TKIP_HDR_LEN || skb->len < hdr_len) - return -1; - - if (rc4key == NULL || keylen < 16) - return -1; - - if (!tkey->tx_phase1_done) { - tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, - tkey->tx_iv32); - tkey->tx_phase1_done = 1; - } - tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); - - pos = skb_push(skb, TKIP_HDR_LEN); - memmove(pos, pos + TKIP_HDR_LEN, hdr_len); - pos += hdr_len; - - *pos++ = *rc4key; - *pos++ = *(rc4key + 1); - *pos++ = *(rc4key + 2); - *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ; - *pos++ = tkey->tx_iv32 & 0xff; - *pos++ = (tkey->tx_iv32 >> 8) & 0xff; - *pos++ = (tkey->tx_iv32 >> 16) & 0xff; - *pos++ = (tkey->tx_iv32 >> 24) & 0xff; - - tkey->tx_iv16++; - if (tkey->tx_iv16 == 0) { - tkey->tx_phase1_done = 0; - tkey->tx_iv32++; - } - - return TKIP_HDR_LEN; -} - -static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - int len; - u8 rc4key[16], *pos, *icv; - u32 crc; - - if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - net_dbg_ratelimited("TKIP countermeasures: dropped TX packet to %pM\n", - hdr->addr1); - return -1; - } - - if (skb_tailroom(skb) < 4 || skb->len < hdr_len) - return -1; - - len = skb->len - hdr_len; - pos = skb->data + hdr_len; - - if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) - return -1; - - crc = ~crc32_le(~0, pos, len); - icv = skb_put(skb, 4); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - - arc4_setkey(&tkey->tx_ctx_arc4, rc4key, 16); - arc4_crypt(&tkey->tx_ctx_arc4, pos, pos, len + 4); - - return 0; -} - -/* - * deal with seq counter wrapping correctly. - * refer to timer_after() for jiffies wrapping handling - */ -static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, - u32 iv32_o, u16 iv16_o) -{ - if ((s32)iv32_n - (s32)iv32_o < 0 || - (iv32_n == iv32_o && iv16_n <= iv16_o)) - return 1; - return 0; -} - -static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 rc4key[16]; - u8 keyidx, *pos; - u32 iv32; - u16 iv16; - struct ieee80211_hdr *hdr; - u8 icv[4]; - u32 crc; - int plen; - - hdr = (struct ieee80211_hdr *)skb->data; - - if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { - net_dbg_ratelimited("TKIP countermeasures: dropped received packet from %pM\n", - hdr->addr2); - return -1; - } - - if (skb->len < hdr_len + TKIP_HDR_LEN + 4) - return -1; - - pos = skb->data + hdr_len; - keyidx = pos[3]; - if (!(keyidx & (1 << 5))) { - net_dbg_ratelimited("TKIP: received packet without ExtIV flag from %pM\n", - hdr->addr2); - return -2; - } - keyidx >>= 6; - if (tkey->key_idx != keyidx) { - net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n", - tkey->key_idx, keyidx); - return -6; - } - if (!tkey->key_set) { - net_dbg_ratelimited("TKIP: received packet from %pM with keyid=%d that does not have a configured key\n", - hdr->addr2, keyidx); - return -3; - } - iv16 = (pos[0] << 8) | pos[2]; - iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); - pos += TKIP_HDR_LEN; - - if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { -#ifdef CONFIG_LIB80211_DEBUG - net_dbg_ratelimited("TKIP: replay detected: STA=%pM previous TSC %08x%04x received TSC %08x%04x\n", - hdr->addr2, tkey->rx_iv32, tkey->rx_iv16, - iv32, iv16); -#endif - tkey->dot11RSNAStatsTKIPReplays++; - return -4; - } - - if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) { - tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32); - tkey->rx_phase1_done = 1; - } - tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16); - - plen = skb->len - hdr_len - 12; - - arc4_setkey(&tkey->rx_ctx_arc4, rc4key, 16); - arc4_crypt(&tkey->rx_ctx_arc4, pos, pos, plen + 4); - - crc = ~crc32_le(~0, pos, plen); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - if (memcmp(icv, pos + plen, 4) != 0) { - if (iv32 != tkey->rx_iv32) { - /* Previously cached Phase1 result was already lost, so - * it needs to be recalculated for the next packet. */ - tkey->rx_phase1_done = 0; - } -#ifdef CONFIG_LIB80211_DEBUG - net_dbg_ratelimited("TKIP: ICV error detected: STA=%pM\n", - hdr->addr2); -#endif - tkey->dot11RSNAStatsTKIPICVErrors++; - return -5; - } - - /* Update real counters only after Michael MIC verification has - * completed */ - tkey->rx_iv32_new = iv32; - tkey->rx_iv16_new = iv16; - - /* Remove IV and ICV */ - memmove(skb->data + TKIP_HDR_LEN, skb->data, hdr_len); - skb_pull(skb, TKIP_HDR_LEN); - skb_trim(skb, skb->len - 4); - - return keyidx; -} - -static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, - u8 *data, size_t data_len, u8 *mic) -{ - SHASH_DESC_ON_STACK(desc, tfm_michael); - int err; - - if (tfm_michael == NULL) { - pr_warn("%s(): tfm_michael == NULL\n", __func__); - return -1; - } - - desc->tfm = tfm_michael; - - if (crypto_shash_setkey(tfm_michael, key, 8)) - return -1; - - err = crypto_shash_init(desc); - if (err) - goto out; - err = crypto_shash_update(desc, hdr, 16); - if (err) - goto out; - err = crypto_shash_update(desc, data, data_len); - if (err) - goto out; - err = crypto_shash_final(desc, mic); - -out: - shash_desc_zero(desc); - return err; -} - -static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) -{ - struct ieee80211_hdr *hdr11; - - hdr11 = (struct ieee80211_hdr *)skb->data; - - switch (le16_to_cpu(hdr11->frame_control) & - (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - case IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ - break; - default: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - } - - if (ieee80211_is_data_qos(hdr11->frame_control)) { - hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11))) - & IEEE80211_QOS_CTL_TID_MASK; - } else - hdr[12] = 0; /* priority */ - - hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ -} - -static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len, - void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 *pos; - - if (skb_tailroom(skb) < 8 || skb->len < hdr_len) { - printk(KERN_DEBUG "Invalid packet for Michael MIC add " - "(tailroom=%d hdr_len=%d skb->len=%d)\n", - skb_tailroom(skb), hdr_len, skb->len); - return -1; - } - - michael_mic_hdr(skb, tkey->tx_hdr); - pos = skb_put(skb, 8); - if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) - return -1; - - return 0; -} - -static void lib80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, - int keyidx) -{ - union iwreq_data wrqu; - struct iw_michaelmicfailure ev; - - /* TODO: needed parameters: count, keyid, key type, TSC */ - memset(&ev, 0, sizeof(ev)); - ev.flags = keyidx & IW_MICFAILURE_KEY_ID; - if (hdr->addr1[0] & 0x01) - ev.flags |= IW_MICFAILURE_GROUP; - else - ev.flags |= IW_MICFAILURE_PAIRWISE; - ev.src_addr.sa_family = ARPHRD_ETHER; - memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = sizeof(ev); - wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); -} - -static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx, - int hdr_len, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - u8 mic[8]; - - if (!tkey->key_set) - return -1; - - michael_mic_hdr(skb, tkey->rx_hdr); - if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) - return -1; - if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { - struct ieee80211_hdr *hdr; - hdr = (struct ieee80211_hdr *)skb->data; - printk(KERN_DEBUG "%s: Michael MIC verification failed for " - "MSDU from %pM keyidx=%d\n", - skb->dev ? skb->dev->name : "N/A", hdr->addr2, - keyidx); - if (skb->dev) - lib80211_michael_mic_failure(skb->dev, hdr, keyidx); - tkey->dot11RSNAStatsTKIPLocalMICFailures++; - return -1; - } - - /* Update TSC counters for RX now that the packet verification has - * completed. */ - tkey->rx_iv32 = tkey->rx_iv32_new; - tkey->rx_iv16 = tkey->rx_iv16_new; - - skb_trim(skb, skb->len - 8); - - return 0; -} - -static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - int keyidx; - struct crypto_shash *tfm = tkey->tx_tfm_michael; - struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4; - struct crypto_shash *tfm3 = tkey->rx_tfm_michael; - struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4; - - keyidx = tkey->key_idx; - memset(tkey, 0, sizeof(*tkey)); - tkey->key_idx = keyidx; - tkey->tx_tfm_michael = tfm; - tkey->tx_ctx_arc4 = *tfm2; - tkey->rx_tfm_michael = tfm3; - tkey->rx_ctx_arc4 = *tfm4; - if (len == TKIP_KEY_LEN) { - memcpy(tkey->key, key, TKIP_KEY_LEN); - tkey->key_set = 1; - tkey->tx_iv16 = 1; /* TSC is initialized to 1 */ - if (seq) { - tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) | - (seq[3] << 8) | seq[2]; - tkey->rx_iv16 = (seq[1] << 8) | seq[0]; - } - } else if (len == 0) - tkey->key_set = 0; - else - return -1; - - return 0; -} - -static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_tkip_data *tkey = priv; - - if (len < TKIP_KEY_LEN) - return -1; - - if (!tkey->key_set) - return 0; - memcpy(key, tkey->key, TKIP_KEY_LEN); - - if (seq) { - /* - * Not clear if this should return the value as is - * or - as the code previously seemed to partially - * have been written as - subtract one from it. It - * was working this way for a long time so leave it. - */ - seq[0] = tkey->tx_iv16; - seq[1] = tkey->tx_iv16 >> 8; - seq[2] = tkey->tx_iv32; - seq[3] = tkey->tx_iv32 >> 8; - seq[4] = tkey->tx_iv32 >> 16; - seq[5] = tkey->tx_iv32 >> 24; - } - - return TKIP_KEY_LEN; -} - -static void lib80211_tkip_print_stats(struct seq_file *m, void *priv) -{ - struct lib80211_tkip_data *tkip = priv; - seq_printf(m, - "key[%d] alg=TKIP key_set=%d " - "tx_pn=%02x%02x%02x%02x%02x%02x " - "rx_pn=%02x%02x%02x%02x%02x%02x " - "replays=%d icv_errors=%d local_mic_failures=%d\n", - tkip->key_idx, tkip->key_set, - (tkip->tx_iv32 >> 24) & 0xff, - (tkip->tx_iv32 >> 16) & 0xff, - (tkip->tx_iv32 >> 8) & 0xff, - tkip->tx_iv32 & 0xff, - (tkip->tx_iv16 >> 8) & 0xff, - tkip->tx_iv16 & 0xff, - (tkip->rx_iv32 >> 24) & 0xff, - (tkip->rx_iv32 >> 16) & 0xff, - (tkip->rx_iv32 >> 8) & 0xff, - tkip->rx_iv32 & 0xff, - (tkip->rx_iv16 >> 8) & 0xff, - tkip->rx_iv16 & 0xff, - tkip->dot11RSNAStatsTKIPReplays, - tkip->dot11RSNAStatsTKIPICVErrors, - tkip->dot11RSNAStatsTKIPLocalMICFailures); -} - -static const struct lib80211_crypto_ops lib80211_crypt_tkip = { - .name = "TKIP", - .init = lib80211_tkip_init, - .deinit = lib80211_tkip_deinit, - .encrypt_mpdu = lib80211_tkip_encrypt, - .decrypt_mpdu = lib80211_tkip_decrypt, - .encrypt_msdu = lib80211_michael_mic_add, - .decrypt_msdu = lib80211_michael_mic_verify, - .set_key = lib80211_tkip_set_key, - .get_key = lib80211_tkip_get_key, - .print_stats = lib80211_tkip_print_stats, - .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ - .extra_mpdu_postfix_len = 4, /* ICV */ - .extra_msdu_postfix_len = 8, /* MIC */ - .get_flags = lib80211_tkip_get_flags, - .set_flags = lib80211_tkip_set_flags, - .owner = THIS_MODULE, -}; - -static int __init lib80211_crypto_tkip_init(void) -{ - return lib80211_register_crypto_ops(&lib80211_crypt_tkip); -} - -static void __exit lib80211_crypto_tkip_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_tkip); -} - -module_init(lib80211_crypto_tkip_init); -module_exit(lib80211_crypto_tkip_exit); diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c deleted file mode 100644 index 3b148c7bef85..000000000000 --- a/net/wireless/lib80211_crypt_wep.c +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * lib80211 crypt: host-based WEP encryption implementation for lib80211 - * - * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi> - * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com> - */ - -#include <linux/err.h> -#include <linux/fips.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/scatterlist.h> -#include <linux/skbuff.h> -#include <linux/mm.h> -#include <asm/string.h> - -#include <net/lib80211.h> - -#include <crypto/arc4.h> -#include <linux/crc32.h> - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("lib80211 crypt: WEP"); -MODULE_LICENSE("GPL"); - -struct lib80211_wep_data { - u32 iv; -#define WEP_KEY_LEN 13 - u8 key[WEP_KEY_LEN + 1]; - u8 key_len; - u8 key_idx; - struct arc4_ctx tx_ctx; - struct arc4_ctx rx_ctx; -}; - -static void *lib80211_wep_init(int keyidx) -{ - struct lib80211_wep_data *priv; - - if (fips_enabled) - return NULL; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - return NULL; - priv->key_idx = keyidx; - - /* start WEP IV from a random value */ - get_random_bytes(&priv->iv, 4); - - return priv; -} - -static void lib80211_wep_deinit(void *priv) -{ - kfree_sensitive(priv); -} - -/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ -static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len, - u8 *key, int keylen, void *priv) -{ - struct lib80211_wep_data *wep = priv; - u32 klen; - u8 *pos; - - if (skb_headroom(skb) < 4 || skb->len < hdr_len) - return -1; - - pos = skb_push(skb, 4); - memmove(pos, pos + 4, hdr_len); - pos += hdr_len; - - klen = 3 + wep->key_len; - - wep->iv++; - - /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key - * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N) - * can be used to speedup attacks, so avoid using them. */ - if ((wep->iv & 0xff00) == 0xff00) { - u8 B = (wep->iv >> 16) & 0xff; - if (B >= 3 && B < klen) - wep->iv += 0x0100; - } - - /* Prepend 24-bit IV to RC4 key and TX frame */ - *pos++ = (wep->iv >> 16) & 0xff; - *pos++ = (wep->iv >> 8) & 0xff; - *pos++ = wep->iv & 0xff; - *pos++ = wep->key_idx << 6; - - return 0; -} - -/* Perform WEP encryption on given skb that has at least 4 bytes of headroom - * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, - * so the payload length increases with 8 bytes. - * - * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) - */ -static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_wep_data *wep = priv; - u32 crc, klen, len; - u8 *pos, *icv; - u8 key[WEP_KEY_LEN + 3]; - - /* other checks are in lib80211_wep_build_iv */ - if (skb_tailroom(skb) < 4) - return -1; - - /* add the IV to the frame */ - if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv)) - return -1; - - /* Copy the IV into the first 3 bytes of the key */ - skb_copy_from_linear_data_offset(skb, hdr_len, key, 3); - - /* Copy rest of the WEP key (the secret part) */ - memcpy(key + 3, wep->key, wep->key_len); - - len = skb->len - hdr_len - 4; - pos = skb->data + hdr_len + 4; - klen = 3 + wep->key_len; - - /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */ - crc = ~crc32_le(~0, pos, len); - icv = skb_put(skb, 4); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - - arc4_setkey(&wep->tx_ctx, key, klen); - arc4_crypt(&wep->tx_ctx, pos, pos, len + 4); - - return 0; -} - -/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of - * the frame: IV (4 bytes), encrypted payload (including SNAP header), - * ICV (4 bytes). len includes both IV and ICV. - * - * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on - * failure. If frame is OK, IV and ICV will be removed. - */ -static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct lib80211_wep_data *wep = priv; - u32 crc, klen, plen; - u8 key[WEP_KEY_LEN + 3]; - u8 keyidx, *pos, icv[4]; - - if (skb->len < hdr_len + 8) - return -1; - - pos = skb->data + hdr_len; - key[0] = *pos++; - key[1] = *pos++; - key[2] = *pos++; - keyidx = *pos++ >> 6; - if (keyidx != wep->key_idx) - return -1; - - klen = 3 + wep->key_len; - - /* Copy rest of the WEP key (the secret part) */ - memcpy(key + 3, wep->key, wep->key_len); - - /* Apply RC4 to data and compute CRC32 over decrypted data */ - plen = skb->len - hdr_len - 8; - - arc4_setkey(&wep->rx_ctx, key, klen); - arc4_crypt(&wep->rx_ctx, pos, pos, plen + 4); - - crc = ~crc32_le(~0, pos, plen); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - if (memcmp(icv, pos + plen, 4) != 0) { - /* ICV mismatch - drop frame */ - return -2; - } - - /* Remove IV and ICV */ - memmove(skb->data + 4, skb->data, hdr_len); - skb_pull(skb, 4); - skb_trim(skb, skb->len - 4); - - return 0; -} - -static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_wep_data *wep = priv; - - if (len < 0 || len > WEP_KEY_LEN) - return -1; - - memcpy(wep->key, key, len); - wep->key_len = len; - - return 0; -} - -static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct lib80211_wep_data *wep = priv; - - if (len < wep->key_len) - return -1; - - memcpy(key, wep->key, wep->key_len); - - return wep->key_len; -} - -static void lib80211_wep_print_stats(struct seq_file *m, void *priv) -{ - struct lib80211_wep_data *wep = priv; - seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); -} - -static const struct lib80211_crypto_ops lib80211_crypt_wep = { - .name = "WEP", - .init = lib80211_wep_init, - .deinit = lib80211_wep_deinit, - .encrypt_mpdu = lib80211_wep_encrypt, - .decrypt_mpdu = lib80211_wep_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = lib80211_wep_set_key, - .get_key = lib80211_wep_get_key, - .print_stats = lib80211_wep_print_stats, - .extra_mpdu_prefix_len = 4, /* IV */ - .extra_mpdu_postfix_len = 4, /* ICV */ - .owner = THIS_MODULE, -}; - -static int __init lib80211_crypto_wep_init(void) -{ - return lib80211_register_crypto_ops(&lib80211_crypt_wep); -} - -static void __exit lib80211_crypto_wep_exit(void) -{ - lib80211_unregister_crypto_ops(&lib80211_crypt_wep); -} - -module_init(lib80211_crypto_wep_init); -module_exit(lib80211_crypto_wep_exit); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 4dac81854721..a5eb92d93074 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -340,12 +340,6 @@ cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a, return -EINVAL; } - if (ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_a) != - ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_b)) { - NL_SET_ERR_MSG(extack, "link EML medium sync delay mismatch"); - return -EINVAL; - } - if (ieee80211_mle_get_eml_cap((const u8 *)mle_a) != ieee80211_mle_get_eml_cap((const u8 *)mle_b)) { NL_SET_ERR_MSG(extack, "link EML capabilities mismatch"); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7d099f7118a..9d2edb71f981 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -829,6 +829,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG }, + [NL80211_ATTR_VIF_RADIO_MASK] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -1285,10 +1286,7 @@ static unsigned int nl80211_link_id(struct nlattr **attrs) { struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; - if (!linkid) - return 0; - - return nla_get_u8(linkid); + return nla_get_u8_default(linkid, 0); } static int nl80211_link_id_or_invalid(struct nlattr **attrs) @@ -2430,6 +2428,11 @@ static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx) if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx)) goto nla_put_failure; + if (r->antenna_mask && + nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, + r->antenna_mask)) + goto nla_put_failure; + for (i = 0; i < r->n_freq_range; i++) { const struct wiphy_radio_freq_range *range = &r->freq_range[i]; @@ -3408,11 +3411,9 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); - if (attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]) - chandef->freq1_offset = nla_get_u32( - attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]); - else - chandef->freq1_offset = 0; + chandef->freq1_offset = + nla_get_u32_default(attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET], + 0); } if (attrs[NL80211_ATTR_CENTER_FREQ2]) chandef->center_freq2 = @@ -3561,7 +3562,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: return cfg80211_set_mesh_channel(rdev, wdev, &chandef); case NL80211_IFTYPE_MONITOR: - return cfg80211_set_monitor_channel(rdev, &chandef); + return cfg80211_set_monitor_channel(rdev, dev, &chandef); default: break; } @@ -3996,7 +3997,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2)) || - nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr)) + nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr) || + nla_put_u32(msg, NL80211_ATTR_VIF_RADIO_MASK, wdev->radio_mask)) goto nla_put_failure; if (rdev->ops->get_channel && !wdev->valid_links) { @@ -4199,6 +4201,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, + [NL80211_MNTR_FLAG_SKIP_TX] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) @@ -4312,6 +4315,29 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; } +static int nl80211_parse_vif_radio_mask(struct genl_info *info, + u32 *radio_mask) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct nlattr *attr = info->attrs[NL80211_ATTR_VIF_RADIO_MASK]; + u32 mask, allowed; + + if (!attr) { + *radio_mask = 0; + return 0; + } + + allowed = BIT(rdev->wiphy.n_radio) - 1; + mask = nla_get_u32(attr); + if (mask & ~allowed) + return -EINVAL; + if (!mask) + mask = allowed; + *radio_mask = mask; + + return 1; +} + static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4319,6 +4345,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) int err; enum nl80211_iftype otype, ntype; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + u32 radio_mask = 0; bool change = false; memset(¶ms, 0, sizeof(params)); @@ -4332,8 +4360,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_MESH_ID]) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; if (otype != NL80211_IFTYPE_MESH_POINT) @@ -4364,6 +4390,12 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (err > 0) change = true; + err = nl80211_parse_vif_radio_mask(info, &radio_mask); + if (err < 0) + return err; + if (err && netif_running(dev)) + return -EBUSY; + if (change) err = cfg80211_change_iface(rdev, dev, ntype, ¶ms); else @@ -4372,11 +4404,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (!err && params.use_4addr != -1) dev->ieee80211_ptr->use_4addr = params.use_4addr; - if (change && !err) { - struct wireless_dev *wdev = dev->ieee80211_ptr; + if (radio_mask) + wdev->radio_mask = radio_mask; + if (change && !err) nl80211_notify_iface(rdev, wdev, NL80211_CMD_SET_INTERFACE); - } return err; } @@ -4387,6 +4419,7 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) struct vif_params params; struct wireless_dev *wdev; struct sk_buff *msg; + u32 radio_mask; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; @@ -4424,6 +4457,10 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) if (err < 0) return err; + err = nl80211_parse_vif_radio_mask(info, &radio_mask); + if (err < 0) + return err; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -4465,6 +4502,9 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) break; } + if (radio_mask) + wdev->radio_mask = radio_mask; + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { nlmsg_free(msg); @@ -6078,6 +6118,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; + if (info->attrs[NL80211_ATTR_SMPS_MODE] && + nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]) != NL80211_SMPS_OFF) + return -EOPNOTSUPP; + params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; @@ -6227,34 +6271,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - if (info->attrs[NL80211_ATTR_SMPS_MODE]) { - params->smps_mode = - nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); - switch (params->smps_mode) { - case NL80211_SMPS_OFF: - break; - case NL80211_SMPS_STATIC: - if (!(rdev->wiphy.features & - NL80211_FEATURE_STATIC_SMPS)) { - err = -EINVAL; - goto out; - } - break; - case NL80211_SMPS_DYNAMIC: - if (!(rdev->wiphy.features & - NL80211_FEATURE_DYNAMIC_SMPS)) { - err = -EINVAL; - goto out; - } - break; - default: - err = -EINVAL; - goto out; - } - } else { - params->smps_mode = NL80211_SMPS_OFF; - } - params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { err = -EOPNOTSUPP; @@ -8244,11 +8260,9 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; - if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) - user_reg_hint_type = - nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); - else - user_reg_hint_type = NL80211_USER_REG_HINT_USER; + user_reg_hint_type = + nla_get_u32_default(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE], + NL80211_USER_REG_HINT_USER); switch (user_reg_hint_type) { case NL80211_USER_REG_HINT_USER: @@ -9180,6 +9194,9 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, lockdep_assert_wiphy(wdev->wiphy); + if (!cfg80211_wdev_channel_allowed(wdev, chan)) + return false; + if (!cfg80211_beaconing_iface_active(wdev)) return true; @@ -9392,7 +9409,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } /* ignore disabled channels */ - if (chan->flags & IEEE80211_CHAN_DISABLED) + if (chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(wdev, chan)) continue; request->channels[i] = chan; @@ -9412,7 +9430,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) chan = &wiphy->bands[band]->channels[j]; - if (chan->flags & IEEE80211_CHAN_DISABLED) + if (chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(wdev, chan)) continue; request->channels[i] = chan; @@ -9776,6 +9795,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request = kzalloc(size, GFP_KERNEL); if (!request) return ERR_PTR(-ENOMEM); + request->n_channels = n_channels; if (n_ssids) request->ssids = (void *)request + @@ -11061,11 +11081,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]); } - if (info->attrs[NL80211_ATTR_SAE_PWE]) - settings->sae_pwe = - nla_get_u8(info->attrs[NL80211_ATTR_SAE_PWE]); - else - settings->sae_pwe = NL80211_SAE_PWE_UNSPECIFIED; + settings->sae_pwe = + nla_get_u8_default(info->attrs[NL80211_ATTR_SAE_PWE], + NL80211_SAE_PWE_UNSPECIFIED); return 0; } @@ -12321,10 +12339,8 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; - if (!info->attrs[NL80211_ATTR_REASON_CODE]) - reason = WLAN_REASON_DEAUTH_LEAVING; - else - reason = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); + reason = nla_get_u16_default(info->attrs[NL80211_ATTR_REASON_CODE], + WLAN_REASON_DEAUTH_LEAVING); if (reason == 0) return -EINVAL; @@ -12446,7 +12462,7 @@ static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) { pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); } else if (info->attrs[NL80211_ATTR_SSID]) { - /* SSID based pmksa flush suppported only for FILS, + /* SSID based pmksa flush supported only for FILS, * OWE/SAE OFFLOAD cases */ if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] && @@ -13670,10 +13686,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]); memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), ETH_ALEN); - if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) - port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]); - else - port = 0; + port = nla_get_u16_default(tb[NL80211_WOWLAN_TCP_SRC_PORT], 0); #ifdef CONFIG_INET /* allocate a socket and port for it and use it */ err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, @@ -13884,11 +13897,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) pat_len < wowlan->pattern_min_len) goto error; - if (!pat_tb[NL80211_PKTPAT_OFFSET]) - pkt_offset = 0; - else - pkt_offset = nla_get_u32( - pat_tb[NL80211_PKTPAT_OFFSET]); + pkt_offset = + nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET], + 0); if (pkt_offset > wowlan->max_pkt_offset) goto error; new_triggers.patterns[i].pkt_offset = pkt_offset; @@ -14061,8 +14072,6 @@ void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce) for (i = 0; i < coalesce->n_rules; i++) { rule = &coalesce->rules[i]; - if (!rule) - continue; for (j = 0; j < rule->n_patterns; j++) kfree(rule->patterns[j].mask); kfree(rule->patterns); @@ -14134,10 +14143,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, pat_len < coalesce->pattern_min_len) return -EINVAL; - if (!pat_tb[NL80211_PKTPAT_OFFSET]) - pkt_offset = 0; - else - pkt_offset = nla_get_u32(pat_tb[NL80211_PKTPAT_OFFSET]); + pkt_offset = nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET], + 0); if (pkt_offset > coalesce->max_pkt_offset) return -EINVAL; new_rule->patterns[i].pkt_offset = pkt_offset; @@ -15498,7 +15505,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) if (tsid >= IEEE80211_FIRST_TSPEC_TSID) { /* TODO: handle 802.11 TSPEC/admission control * need more attributes for that (e.g. BA session requirement); - * change the WMM adminssion test above to allow both then + * change the WMM admission test above to allow both then */ return -EINVAL; } diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index b7e3e46ec16d..326faea38ca3 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -200,7 +200,7 @@ static void find_ns(struct ieee80211_radiotap_iterator *iterator, * present fields. @this_arg can be changed by the caller (eg, * incremented to move inside a compound argument like * IEEE80211_RADIOTAP_CHANNEL). The args pointed to are in - * little-endian format whatever the endianess of your CPU. + * little-endian format whatever the endianness of your CPU. * * Alignment Gotcha: * You must take care when dereferencing iterator.this_arg diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index f5adbf6b5c84..adb6105bbb7d 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -445,11 +445,12 @@ rdev_libertas_set_mesh_channel(struct cfg80211_registered_device *rdev, static inline int rdev_set_monitor_channel(struct cfg80211_registered_device *rdev, + struct net_device *dev, struct cfg80211_chan_def *chandef) { int ret; - trace_rdev_set_monitor_channel(&rdev->wiphy, chandef); - ret = rdev->ops->set_monitor_channel(&rdev->wiphy, chandef); + trace_rdev_set_monitor_channel(&rdev->wiphy, dev, chandef); + ret = rdev->ops->set_monitor_channel(&rdev->wiphy, dev, chandef); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6489ba943a63..1df65a5a44f7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1147,7 +1147,7 @@ static const struct ieee80211_regdomain *reg_get_regdomain(struct wiphy *wiphy) /* * Follow the driver's regulatory domain, if present, unless a country - * IE has been processed or a user wants to help complaince further + * IE has been processed or a user wants to help compliance further */ if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && lr->initiator != NL80211_REGDOM_SET_BY_USER && diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d0aed41ded2f..1c6fd45aa809 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -956,7 +956,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) struct ieee80211_channel *chan = ieee80211_get_channel(&rdev->wiphy, ap->center_freq); - if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) + if (!chan || chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(rdev_req->wdev, chan)) continue; for (i = 0; i < rdev_req->n_channels; i++) { @@ -3519,9 +3520,12 @@ int cfg80211_wext_siwscan(struct net_device *dev, continue; for (j = 0; j < wiphy->bands[band]->n_channels; j++) { + struct ieee80211_channel *chan; + /* ignore disabled channels */ - if (wiphy->bands[band]->channels[j].flags & - IEEE80211_CHAN_DISABLED) + chan = &wiphy->bands[band]->channels[j]; + if (chan->flags & IEEE80211_CHAN_DISABLED || + !cfg80211_wdev_channel_allowed(creq->wdev, chan)) continue; /* If we have a wireless request structure and the @@ -3598,7 +3602,6 @@ int cfg80211_wext_siwscan(struct net_device *dev, kfree(creq); return err; } -EXPORT_WEXT_HANDLER(cfg80211_wext_siwscan); static char *ieee80211_scan_add_ies(struct iw_request_info *info, const struct cfg80211_bss_ies *ies, @@ -3970,5 +3973,4 @@ int cfg80211_wext_giwscan(struct net_device *dev, return res; } -EXPORT_WEXT_HANDLER(cfg80211_wext_giwscan); #endif diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 97c21b627791..d5c9bb614fa6 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1318,19 +1318,21 @@ TRACE_EVENT(rdev_libertas_set_mesh_channel, ); TRACE_EVENT(rdev_set_monitor_channel, - TP_PROTO(struct wiphy *wiphy, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, chandef), + TP_ARGS(wiphy, netdev, chandef), TP_STRUCT__entry( WIPHY_ENTRY + NETDEV_ENTRY CHAN_DEF_ENTRY ), TP_fast_assign( WIPHY_ASSIGN; + NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); TRACE_EVENT(rdev_auth, diff --git a/net/wireless/util.c b/net/wireless/util.c index f49b55724f83..040d62051eb9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -743,7 +743,7 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, return NULL; /* - * When reusing framents, copy some data to the head to simplify + * When reusing fragments, copy some data to the head to simplify * ethernet header handling and speed up protocol header processing * in the stack later. */ @@ -2923,3 +2923,32 @@ bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, return true; } EXPORT_SYMBOL(cfg80211_radio_chandef_valid); + +bool cfg80211_wdev_channel_allowed(struct wireless_dev *wdev, + struct ieee80211_channel *chan) +{ + struct wiphy *wiphy = wdev->wiphy; + const struct wiphy_radio *radio; + struct cfg80211_chan_def chandef; + u32 radio_mask; + int i; + + radio_mask = wdev->radio_mask; + if (!wiphy->n_radio || radio_mask == BIT(wiphy->n_radio) - 1) + return true; + + cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); + for (i = 0; i < wiphy->n_radio; i++) { + if (!(radio_mask & BIT(i))) + continue; + + radio = &wiphy->radio[i]; + if (!cfg80211_radio_chandef_valid(radio, &chandef)) + continue; + + return true; + } + + return false; +} +EXPORT_SYMBOL(cfg80211_wdev_channel_allowed); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 2371069f3c43..90d5c0592667 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -30,7 +30,6 @@ int cfg80211_wext_giwname(struct net_device *dev, strcpy(wrqu->name, "IEEE 802.11"); return 0; } -EXPORT_WEXT_HANDLER(cfg80211_wext_giwname); int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) @@ -69,7 +68,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, return ret; } -EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode); int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) @@ -105,7 +103,6 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, } return 0; } -EXPORT_WEXT_HANDLER(cfg80211_wext_giwmode); int cfg80211_wext_giwrange(struct net_device *dev, @@ -220,7 +217,6 @@ int cfg80211_wext_giwrange(struct net_device *dev, return 0; } -EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange); /** @@ -281,7 +277,6 @@ out: wiphy_unlock(&rdev->wiphy); return err; } -EXPORT_WEXT_HANDLER(cfg80211_wext_siwrts); int cfg80211_wext_giwrts(struct net_device *dev, struct iw_request_info *info, @@ -296,7 +291,6 @@ int cfg80211_wext_giwrts(struct net_device *dev, return 0; } -EXPORT_WEXT_HANDLER(cfg80211_wext_giwrts); int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_request_info *info, @@ -327,7 +321,6 @@ out: return err; } -EXPORT_WEXT_HANDLER(cfg80211_wext_siwfrag); int cfg80211_wext_giwfrag(struct net_device *dev, struct iw_request_info *info, @@ -342,7 +335,6 @@ int cfg80211_wext_giwfrag(struct net_device *dev, return 0; } -EXPORT_WEXT_HANDLER(cfg80211_wext_giwfrag); static int cfg80211_wext_siwretry(struct net_device *dev, struct iw_request_info *info, @@ -413,7 +405,6 @@ int cfg80211_wext_giwretry(struct net_device *dev, return 0; } -EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry); static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, struct net_device *dev, bool pairwise, @@ -830,7 +821,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, ret = -EINVAL; break; } - ret = cfg80211_set_monitor_channel(rdev, &chandef); + ret = cfg80211_set_monitor_channel(rdev, dev, &chandef); break; case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); @@ -1204,7 +1195,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev, switch (wrq->flags & IW_POWER_MODE) { case IW_POWER_ON: /* If not specified */ case IW_POWER_MODE: /* If set all mask */ - case IW_POWER_ALL_R: /* If explicitely state all */ + case IW_POWER_ALL_R: /* If explicitly state all */ ps = true; break; default: /* Otherwise we ignore */ diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index c02eb789e676..8251ca5df8ae 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -5,12 +5,6 @@ #include <net/iw_handler.h> #include <linux/wireless.h> -#ifdef CONFIG_CFG80211_WEXT_EXPORT -#define EXPORT_WEXT_HANDLER(h) EXPORT_SYMBOL_GPL(h) -#else -#define EXPORT_WEXT_HANDLER(h) -#endif /* CONFIG_CFG80211_WEXT_EXPORT */ - int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *wextfreq, char *extra); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 838ad6541a17..3bb04b05c5ce 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1159,7 +1159,7 @@ char *iwe_stream_add_event(struct iw_request_info *info, char *stream, /* Check if it's possible */ if (likely((stream + event_len) < ends)) { iwe->len = event_len; - /* Beware of alignement issues on 64 bits */ + /* Beware of alignment issues on 64 bits */ memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN); memcpy(stream + lcp_len, &iwe->u, event_len - lcp_len); diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c deleted file mode 100644 index b379a0371653..000000000000 --- a/net/wireless/wext-spy.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * This file implement the Wireless Extensions spy API. - * - * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> - * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. - * - * (As all part of the Linux kernel, this file is GPL) - */ - -#include <linux/wireless.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/export.h> -#include <net/iw_handler.h> -#include <net/arp.h> -#include <net/wext.h> - -static inline struct iw_spy_data *get_spydata(struct net_device *dev) -{ - /* This is the new way */ - if (dev->wireless_data) - return dev->wireless_data->spy_data; - return NULL; -} - -int iw_handler_set_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Disable spy collection while we copy the addresses. - * While we copy addresses, any call to wireless_spy_update() - * will NOP. This is OK, as anyway the addresses are changing. */ - spydata->spy_number = 0; - - /* We want to operate without locking, because wireless_spy_update() - * most likely will happen in the interrupt handler, and therefore - * have its own locking constraints and needs performance. - * The rtnl_lock() make sure we don't race with the other iw_handlers. - * This make sure wireless_spy_update() "see" that the spy list - * is temporarily disabled. */ - smp_wmb(); - - /* Are there are addresses to copy? */ - if (wrqu->data.length > 0) { - int i; - - /* Copy addresses */ - for (i = 0; i < wrqu->data.length; i++) - memcpy(spydata->spy_address[i], address[i].sa_data, - ETH_ALEN); - /* Reset stats */ - memset(spydata->spy_stat, 0, - sizeof(struct iw_quality) * IW_MAX_SPY); - } - - /* Make sure above is updated before re-enabling */ - smp_wmb(); - - /* Enable addresses */ - spydata->spy_number = wrqu->data.length; - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_spy); - -int iw_handler_get_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - int i; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - wrqu->data.length = spydata->spy_number; - - /* Copy addresses. */ - for (i = 0; i < spydata->spy_number; i++) { - memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); - address[i].sa_family = AF_UNIX; - } - /* Copy stats to the user buffer (just after). */ - if (spydata->spy_number > 0) - memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), - spydata->spy_stat, - sizeof(struct iw_quality) * spydata->spy_number); - /* Reset updated flags. */ - for (i = 0; i < spydata->spy_number; i++) - spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; - return 0; -} -EXPORT_SYMBOL(iw_handler_get_spy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : set spy threshold - */ -int iw_handler_set_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - spydata->spy_thr_low = threshold->low; - spydata->spy_thr_high = threshold->high; - - /* Clear flag */ - memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : get spy threshold - */ -int iw_handler_get_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - threshold->low = spydata->spy_thr_low; - threshold->high = spydata->spy_thr_high; - - return 0; -} -EXPORT_SYMBOL(iw_handler_get_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Prepare and send a Spy Threshold event - */ -static void iw_send_thrspy_event(struct net_device * dev, - struct iw_spy_data * spydata, - unsigned char * address, - struct iw_quality * wstats) -{ - union iwreq_data wrqu; - struct iw_thrspy threshold; - - /* Init */ - wrqu.data.length = 1; - wrqu.data.flags = 0; - /* Copy address */ - memcpy(threshold.addr.sa_data, address, ETH_ALEN); - threshold.addr.sa_family = ARPHRD_ETHER; - /* Copy stats */ - threshold.qual = *wstats; - /* Copy also thresholds */ - threshold.low = spydata->spy_thr_low; - threshold.high = spydata->spy_thr_high; - - /* Send event to user space */ - wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); -} - -/* ---------------------------------------------------------------- */ -/* - * Call for the driver to update the spy data. - * For now, the spy data is a simple array. As the size of the array is - * small, this is good enough. If we wanted to support larger number of - * spy addresses, we should use something more efficient... - */ -void wireless_spy_update(struct net_device * dev, - unsigned char * address, - struct iw_quality * wstats) -{ - struct iw_spy_data * spydata = get_spydata(dev); - int i; - int match = -1; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return; - - /* Update all records that match */ - for (i = 0; i < spydata->spy_number; i++) - if (ether_addr_equal(address, spydata->spy_address[i])) { - memcpy(&(spydata->spy_stat[i]), wstats, - sizeof(struct iw_quality)); - match = i; - } - - /* Generate an event if we cross the spy threshold. - * To avoid event storms, we have a simple hysteresis : we generate - * event only when we go under the low threshold or above the - * high threshold. */ - if (match >= 0) { - if (spydata->spy_thr_under[match]) { - if (wstats->level > spydata->spy_thr_high.level) { - spydata->spy_thr_under[match] = 0; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } else { - if (wstats->level < spydata->spy_thr_low.level) { - spydata->spy_thr_under[match] = 1; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } - } -} -EXPORT_SYMBOL(wireless_spy_update); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 1140b2a120ca..3fa70286c846 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -141,7 +141,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len, u64 addr; int err; - addr = xp_get_handle(xskb); + addr = xp_get_handle(xskb, xskb->pool); err = xskq_prod_reserve_desc(xs->rx, addr, len, flags); if (err) { xs->rx_queue_full++; @@ -171,14 +171,14 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return 0; xskb_list = &xskb->pool->xskb_list; - list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { + list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { if (list_is_singular(xskb_list)) contd = 0; len = pos->xdp.data_end - pos->xdp.data; err = __xsk_rcv_zc(xs, pos, len, contd); if (err) goto err; - list_del(&pos->xskb_list_node); + list_del(&pos->list_node); } return 0; @@ -527,34 +527,34 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags) return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); } -static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr) +static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr) { unsigned long flags; int ret; - spin_lock_irqsave(&xs->pool->cq_lock, flags); - ret = xskq_prod_reserve_addr(xs->pool->cq, addr); - spin_unlock_irqrestore(&xs->pool->cq_lock, flags); + spin_lock_irqsave(&pool->cq_lock, flags); + ret = xskq_prod_reserve_addr(pool->cq, addr); + spin_unlock_irqrestore(&pool->cq_lock, flags); return ret; } -static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n) +static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n) { unsigned long flags; - spin_lock_irqsave(&xs->pool->cq_lock, flags); - xskq_prod_submit_n(xs->pool->cq, n); - spin_unlock_irqrestore(&xs->pool->cq_lock, flags); + spin_lock_irqsave(&pool->cq_lock, flags); + xskq_prod_submit_n(pool->cq, n); + spin_unlock_irqrestore(&pool->cq_lock, flags); } -static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n) +static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n) { unsigned long flags; - spin_lock_irqsave(&xs->pool->cq_lock, flags); - xskq_prod_cancel_n(xs->pool->cq, n); - spin_unlock_irqrestore(&xs->pool->cq_lock, flags); + spin_lock_irqsave(&pool->cq_lock, flags); + xskq_prod_cancel_n(pool->cq, n); + spin_unlock_irqrestore(&pool->cq_lock, flags); } static u32 xsk_get_num_desc(struct sk_buff *skb) @@ -571,7 +571,7 @@ static void xsk_destruct_skb(struct sk_buff *skb) *compl->tx_timestamp = ktime_get_tai_fast_ns(); } - xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb)); + xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb)); sock_wfree(skb); } @@ -587,7 +587,7 @@ static void xsk_consume_skb(struct sk_buff *skb) struct xdp_sock *xs = xdp_sk(skb->sk); skb->destructor = sock_wfree; - xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb)); + xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb)); /* Free skb without triggering the perf drop trace */ consume_skb(skb); xs->skb = NULL; @@ -675,6 +675,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, len = desc->len; if (!skb) { + first_frag = true; + hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom)); tr = dev->needed_tailroom; skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err); @@ -685,12 +687,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb_put(skb, len); err = skb_store_bits(skb, 0, buffer, len); - if (unlikely(err)) { - kfree_skb(skb); + if (unlikely(err)) goto free_err; - } - - first_frag = true; } else { int nr_frags = skb_shinfo(skb)->nr_frags; struct page *page; @@ -758,6 +756,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, return skb; free_err: + if (first_frag && skb) + kfree_skb(skb); + if (err == -EOVERFLOW) { /* Drop the packet */ xsk_set_destructor_arg(xs->skb); @@ -765,7 +766,7 @@ free_err: xskq_cons_release(xs->tx); } else { /* Let application retry */ - xsk_cq_cancel_locked(xs, 1); + xsk_cq_cancel_locked(xs->pool, 1); } return ERR_PTR(err); @@ -802,7 +803,7 @@ static int __xsk_generic_xmit(struct sock *sk) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ - if (xsk_cq_reserve_addr_locked(xs, desc.addr)) + if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr)) goto out; skb = xsk_build_skb(xs, &desc); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 521a2938e50a..ae71da7d2cd6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -101,8 +101,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; - INIT_LIST_HEAD(&xskb->free_list_node); - INIT_LIST_HEAD(&xskb->xskb_list_node); + INIT_LIST_HEAD(&xskb->list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else @@ -230,6 +229,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, goto err_unreg_xsk; } pool->umem->zc = true; + pool->xdp_zc_max_segs = netdev->xdp_zc_max_segs; return 0; err_unreg_xsk: @@ -417,8 +417,10 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_ for (i = 0; i < pool->heads_cnt; i++) { struct xdp_buff_xsk *xskb = &pool->heads[i]; + u64 orig_addr; - xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); + orig_addr = xskb->xdp.data_hard_start - pool->addrs - pool->headroom; + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, orig_addr); } } @@ -501,6 +503,22 @@ static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr) return *addr < pool->addrs_cnt; } +static struct xdp_buff_xsk *xp_get_xskb(struct xsk_buff_pool *pool, u64 addr) +{ + struct xdp_buff_xsk *xskb; + + if (pool->unaligned) { + xskb = pool->free_heads[--pool->free_heads_cnt]; + xp_init_xskb_addr(xskb, pool, addr); + if (pool->dma_pages) + xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); + } else { + xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; + } + + return xskb; +} + static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb; @@ -526,14 +544,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) break; } - if (pool->unaligned) { - xskb = pool->free_heads[--pool->free_heads_cnt]; - xp_init_xskb_addr(xskb, pool, addr); - if (pool->dma_pages) - xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); - } else { - xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; - } + xskb = xp_get_xskb(pool, addr); xskq_cons_release(pool->fq); return xskb; @@ -550,8 +561,8 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) } else { pool->free_list_cnt--; xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, - free_list_node); - list_del_init(&xskb->free_list_node); + list_node); + list_del_init(&xskb->list_node); } xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; @@ -591,14 +602,7 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd continue; } - if (pool->unaligned) { - xskb = pool->free_heads[--pool->free_heads_cnt]; - xp_init_xskb_addr(xskb, pool, addr); - if (pool->dma_pages) - xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); - } else { - xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; - } + xskb = xp_get_xskb(pool, addr); *xdp = &xskb->xdp; xdp++; @@ -617,8 +621,8 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3 i = nb_entries; while (i--) { - xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node); - list_del_init(&xskb->free_list_node); + xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node); + list_del_init(&xskb->list_node); *xdp = &xskb->xdp; xdp++; @@ -688,11 +692,11 @@ EXPORT_SYMBOL(xp_can_alloc); void xp_free(struct xdp_buff_xsk *xskb) { - if (!list_empty(&xskb->free_list_node)) + if (!list_empty(&xskb->list_node)) return; xskb->pool->free_list_cnt++; - list_add(&xskb->free_list_node, &xskb->pool->free_list); + list_add(&xskb->list_node, &xskb->pool->free_list); } EXPORT_SYMBOL(xp_free); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 406b20dfee8d..46d87e961ad6 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -260,7 +260,7 @@ u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, nr_frags = 0; } else { nr_frags++; - if (nr_frags == pool->netdev->xdp_zc_max_segs) { + if (nr_frags == pool->xdp_zc_max_segs) { nr_frags = 0; break; } diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 91357ccaf4af..5b9ee63e30b6 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -132,6 +132,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, + [XFRMA_SA_PCPU] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -282,9 +283,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_MTIMER_THRESH: case XFRMA_SA_DIR: case XFRMA_NAT_KEEPALIVE_INTERVAL: + case XFRMA_SA_PCPU: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -439,7 +441,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 749e7eea99e4..841a60a6fbfe 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -572,7 +572,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); + x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family); if (x == NULL) { secpath_reset(skb); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a2ea9dbac90b..4408c11c0835 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -289,7 +289,7 @@ struct dst_entry *__xfrm_dst_lookup(int family, EXPORT_SYMBOL(__xfrm_dst_lookup); static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, - int tos, int oif, + dscp_t dscp, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family, u32 mark) @@ -312,7 +312,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, params.net = net; params.saddr = saddr; params.daddr = daddr; - params.tos = tos; + params.dscp = dscp; params.oif = oif; params.mark = mark; params.ipproto = x->id.proto; @@ -434,6 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) if (policy) { write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); + INIT_HLIST_HEAD(&policy->state_cache_list); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -475,6 +476,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy); static void xfrm_policy_kill(struct xfrm_policy *policy) { + struct net *net = xp_net(policy); + struct xfrm_state *x; + xfrm_dev_policy_delete(policy); write_lock_bh(&policy->lock); @@ -490,6 +494,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) if (del_timer(&policy->timer)) xfrm_pol_put(policy); + /* XXX: Flush state cache */ + spin_lock_bh(&net->xfrm.xfrm_state_lock); + hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) { + hlist_del_init_rcu(&x->state_cache); + } + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + xfrm_pol_put(policy); } @@ -2576,10 +2587,10 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, } -static int xfrm_get_tos(const struct flowi *fl, int family) +static dscp_t xfrm_get_dscp(const struct flowi *fl, int family) { if (family == AF_INET) - return fl->u.ip4.flowi4_tos & INET_DSCP_MASK; + return inet_dsfield_to_dscp(fl->u.ip4.flowi4_tos); return 0; } @@ -2667,13 +2678,13 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, int header_len = 0; int nfheader_len = 0; int trailer_len = 0; - int tos; int family = policy->selector.family; xfrm_address_t saddr, daddr; + dscp_t dscp; xfrm_flowi_addr_get(fl, &saddr, &daddr, family); - tos = xfrm_get_tos(fl, family); + dscp = xfrm_get_dscp(fl, family); dst_hold(dst); @@ -2721,8 +2732,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, family = xfrm[i]->props.family; oif = fl->flowi_oif ? : fl->flowi_l3mdev; - dst = xfrm_dst_lookup(xfrm[i], tos, oif, - &saddr, &daddr, family, mark); + dst = xfrm_dst_lookup(xfrm[i], dscp, oif, &saddr, + &daddr, family, mark); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; @@ -3275,6 +3286,7 @@ no_transform: dst_release(dst); dst = dst_orig; } + ok: xfrm_pols_put(pols, drop_pols); if (dst && dst->xfrm && diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 37478d36a8df..67ca7ac955a3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -665,6 +665,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) refcount_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->km.all); + INIT_HLIST_NODE(&x->state_cache); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -679,6 +680,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; + x->pcpu_num = UINT_MAX; spin_lock_init(&x->lock); } return x; @@ -743,12 +745,18 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; + spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); hlist_del_rcu(&x->bydst); hlist_del_rcu(&x->bysrc); if (x->km.seq) hlist_del_rcu(&x->byseq); + if (!hlist_unhashed(&x->state_cache)) + hlist_del_rcu(&x->state_cache); + if (!hlist_unhashed(&x->state_cache_input)) + hlist_del_rcu(&x->state_cache_input); + if (x->id.spi) hlist_del_rcu(&x->byspi); net->xfrm.state_num--; @@ -1101,6 +1109,52 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, return NULL; } +struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family) +{ + struct hlist_head *state_cache_input; + struct xfrm_state *x = NULL; + int cpu = get_cpu(); + + state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu); + + rcu_read_lock(); + hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) { + if (x->props.family != family || + x->id.spi != spi || + x->id.proto != proto || + !xfrm_addr_equal(&x->id.daddr, daddr, family)) + continue; + + if ((mark & x->mark.m) != x->mark.v) + continue; + if (!xfrm_state_hold_rcu(x)) + continue; + goto out; + } + + x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + + if (x && x->km.state == XFRM_STATE_VALID) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); + if (hlist_unhashed(&x->state_cache_input)) { + hlist_add_head_rcu(&x->state_cache_input, state_cache_input); + } else { + hlist_del_rcu(&x->state_cache_input); + hlist_add_head_rcu(&x->state_cache_input, state_cache_input); + } + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + } + +out: + rcu_read_unlock(); + put_cpu(); + return x; +} +EXPORT_SYMBOL(xfrm_input_state_lookup); + static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -1155,6 +1209,12 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, struct xfrm_state **best, int *acq_in_progress, int *error) { + /* We need the cpu id just as a lookup key, + * we don't require it to be stable. + */ + unsigned int pcpu_id = get_cpu(); + put_cpu(); + /* Resolution logic: * 1. There is a valid state with matching selector. Done. * 2. Valid state with inappropriate selector. Skip. @@ -1174,13 +1234,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, &fl->u.__fl_common)) return; + if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id) + return; + if (!*best || + ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) || (*best)->km.dying > x->km.dying || ((*best)->km.dying == x->km.dying && (*best)->curlft.add_time < x->curlft.add_time)) *best = x; } else if (x->km.state == XFRM_STATE_ACQ) { - *acq_in_progress = 1; + if (!*best || x->pcpu_num == pcpu_id) + *acq_in_progress = 1; } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if ((!x->sel.family || @@ -1209,12 +1274,60 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short encap_family = tmpl->encap_family; unsigned int sequence; struct km_event c; + unsigned int pcpu_id; + bool cached = false; + + /* We need the cpu id just as a lookup key, + * we don't require it to be stable. + */ + pcpu_id = get_cpu(); + put_cpu(); to_put = NULL; sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); rcu_read_lock(); + hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) { + if (x->props.family == encap_family && + x->props.reqid == tmpl->reqid && + (mark & x->mark.m) == x->mark.v && + x->if_id == if_id && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && + tmpl->mode == x->props.mode && + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, encap_family, + &best, &acquire_in_progress, &error); + } + + if (best) + goto cached; + + hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) { + if (x->props.family == encap_family && + x->props.reqid == tmpl->reqid && + (mark & x->mark.m) == x->mark.v && + x->if_id == if_id && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_addr_equal(&x->id.daddr, daddr, encap_family) && + tmpl->mode == x->props.mode && + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, family, + &best, &acquire_in_progress, &error); + } + +cached: + cached = true; + if (best) + goto found; + else if (error) + best = NULL; + else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ + WARN_ON(1); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { #ifdef CONFIG_XFRM_OFFLOAD @@ -1282,7 +1395,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, } found: - x = best; + if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || + (best && (best->pcpu_num == pcpu_id))) + x = best; + if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && (x0 = __xfrm_state_lookup_all(net, mark, daddr, @@ -1314,6 +1430,8 @@ found: xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); x->if_id = if_id; + if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best) + x->pcpu_num = pcpu_id; error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { @@ -1352,6 +1470,7 @@ found: x->km.state = XFRM_STATE_ACQ; x->dir = XFRM_SA_DIR_OUT; list_add(&x->km.all, &net->xfrm.state_all); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, x->xso.type); @@ -1359,6 +1478,7 @@ found: XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h, x->xso.type); + INIT_HLIST_NODE(&x->state_cache); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); XFRM_STATE_INSERT(byspi, &x->byspi, @@ -1392,6 +1512,11 @@ found: x = NULL; error = -ESRCH; } + + /* Use the already installed 'fallback' while the CPU-specific + * SA acquire is handled*/ + if (best) + x = best; } out: if (x) { @@ -1402,6 +1527,15 @@ out: } else { *err = acquire_in_progress ? -EAGAIN : error; } + + if (x && x->km.state == XFRM_STATE_VALID && !cached && + (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); + if (hlist_unhashed(&x->state_cache)) + hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + } + rcu_read_unlock(); if (to_put) xfrm_state_put(to_put); @@ -1524,12 +1658,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) unsigned int h; u32 mark = xnew->mark.v & xnew->mark.m; u32 if_id = xnew->if_id; + u32 cpu_id = xnew->pcpu_num; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && x->if_id == if_id && + x->pcpu_num == cpu_id && (mark & x->mark.m) == x->mark.v && xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) && xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family)) @@ -1552,7 +1688,7 @@ EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__find_acq_core(struct net *net, const struct xfrm_mark *m, unsigned short family, u8 mode, - u32 reqid, u32 if_id, u8 proto, + u32 reqid, u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create) @@ -1569,6 +1705,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, x->id.spi != 0 || x->id.proto != proto || (mark & x->mark.m) != x->mark.v || + x->pcpu_num != pcpu_num || !xfrm_addr_equal(&x->id.daddr, daddr, family) || !xfrm_addr_equal(&x->props.saddr, saddr, family)) continue; @@ -1602,6 +1739,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, break; } + x->pcpu_num = pcpu_num; x->km.state = XFRM_STATE_ACQ; x->id.proto = proto; x->props.family = family; @@ -1630,7 +1768,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_add(struct xfrm_state *x) { @@ -1656,7 +1794,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); + x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num); if (x1 && ((x1->id.proto != x->id.proto) || !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1666,7 +1804,7 @@ int xfrm_state_add(struct xfrm_state *x) if (use_spi && !x1) x1 = __find_acq_core(net, &x->mark, family, x->props.mode, - x->props.reqid, x->if_id, x->id.proto, + x->props.reqid, x->if_id, x->pcpu_num, x->id.proto, &x->id.daddr, &x->props.saddr, 0); __xfrm_state_bump_genids(x); @@ -1791,6 +1929,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; + x->pcpu_num = orig->pcpu_num; x->if_id = orig->if_id; x->tfcpad = orig->tfcpad; x->replay_maxdiff = orig->replay_maxdiff; @@ -2066,13 +2205,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, - u32 if_id, u8 proto, const xfrm_address_t *daddr, + u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create); + x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num, + proto, daddr, saddr, create); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; @@ -2207,7 +2347,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num) { unsigned int h = xfrm_seq_hash(net, seq); struct xfrm_state *x; @@ -2215,6 +2355,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && + x->pcpu_num == pcpu_num && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; @@ -2224,12 +2365,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num) { struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __xfrm_find_acq_byseq(net, mark, seq); + x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } @@ -2988,6 +3129,11 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_byseq = xfrm_hash_alloc(sz); if (!net->xfrm.state_byseq) goto out_byseq; + + net->xfrm.state_cache_input = alloc_percpu(struct hlist_head); + if (!net->xfrm.state_cache_input) + goto out_state_cache_input; + net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); net->xfrm.state_num = 0; @@ -2997,6 +3143,8 @@ int __net_init xfrm_state_init(struct net *net) &net->xfrm.xfrm_state_lock); return 0; +out_state_cache_input: + xfrm_hash_free(net->xfrm.state_byseq, sz); out_byseq: xfrm_hash_free(net->xfrm.state_byspi, sz); out_byspi: @@ -3026,6 +3174,7 @@ void xfrm_state_fini(struct net *net) xfrm_hash_free(net->xfrm.state_bysrc, sz); WARN_ON(!hlist_empty(net->xfrm.state_bydst)); xfrm_hash_free(net->xfrm.state_bydst, sz); + free_percpu(net->xfrm.state_cache_input); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e3b8ce89831a..b2876e09328b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -200,7 +200,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, struct netlink_ext_ack *extack) { int err; - u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0; + u8 sa_dir = nla_get_u8_default(attrs[XFRMA_SA_DIR], 0); u16 family = p->sel.family; err = -EINVAL; @@ -460,6 +460,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, } } + if (!sa_dir && attrs[XFRMA_SA_PCPU]) { + NL_SET_ERR_MSG(extack, "SA_PCPU only supported with SA_DIR"); + err = -EINVAL; + goto out; + } + out: return err; } @@ -767,10 +773,8 @@ static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_SET_MARK]) { m->v = nla_get_u32(attrs[XFRMA_SET_MARK]); - if (attrs[XFRMA_SET_MARK_MASK]) - m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]); - else - m->m = 0xffffffff; + m->m = nla_get_u32_default(attrs[XFRMA_SET_MARK_MASK], + 0xffffffff); } else { m->v = m->m = 0; } @@ -841,6 +845,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, x->nat_keepalive_interval = nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]); + if (attrs[XFRMA_SA_PCPU]) { + x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); + if (x->pcpu_num >= num_possible_cpus()) + goto error; + } + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -1089,7 +1099,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) if (!nla) return -EMSGSIZE; algo = nla_data(nla); - strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); + strscpy_pad(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); if (redact_secret && auth->alg_key_len) memset(algo->alg_key, 0, (auth->alg_key_len + 7) / 8); @@ -1296,6 +1306,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } + if (x->pcpu_num != UINT_MAX) { + ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); + if (ret) + goto out; + } if (x->dir) ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); @@ -1700,6 +1715,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, u32 mark; struct xfrm_mark m; u32 if_id = 0; + u32 pcpu_num = UINT_MAX; p = nlmsg_data(nlh); err = verify_spi_info(p->info.id.proto, p->min, p->max, extack); @@ -1716,8 +1732,16 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (attrs[XFRMA_SA_PCPU]) { + pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); + if (pcpu_num >= num_possible_cpus()) { + err = -EINVAL; + goto out_noput; + } + } + if (p->info.seq) { - x = xfrm_find_acq_byseq(net, mark, p->info.seq); + x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num); if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -1726,7 +1750,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (!x) x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid, - if_id, p->info.id.proto, daddr, + if_id, pcpu_num, p->info.id.proto, daddr, &p->info.saddr, 1, family); err = -ENOENT; @@ -2526,7 +2550,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4) /* XFRM_AE_ETHR */ - + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */ + + nla_total_size(sizeof(x->dir)) /* XFRMA_SA_DIR */ + + nla_total_size(4); /* XFRMA_SA_PCPU */ } static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -2582,6 +2607,11 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct err = xfrm_if_id_put(skb, x->if_id); if (err) goto out_cancel; + if (x->pcpu_num != UINT_MAX) { + err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); + if (err) + goto out_cancel; + } if (x->dir) { err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); @@ -2852,6 +2882,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, xfrm_mark_get(attrs, &mark); + if (attrs[XFRMA_SA_PCPU]) { + x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); + err = -EINVAL; + if (x->pcpu_num >= num_possible_cpus()) + goto free_state; + } + err = verify_newpolicy_info(&ua->policy, extack); if (err) goto free_state; @@ -3182,6 +3219,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, + [XFRMA_SA_PCPU] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); @@ -3245,6 +3283,20 @@ static int xfrm_reject_unused_attr(int type, struct nlattr **attrs, } } + if (attrs[XFRMA_SA_PCPU]) { + switch (type) { + case XFRM_MSG_NEWSA: + case XFRM_MSG_UPDSA: + case XFRM_MSG_ALLOCSPI: + case XFRM_MSG_ACQUIRE: + + break; + default: + NL_SET_ERR_MSG(extack, "Invalid attribute SA_PCPU"); + return -EINVAL; + } + } + return 0; } @@ -3348,7 +3400,8 @@ static inline unsigned int xfrm_expire_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + nla_total_size(sizeof(struct xfrm_mark)) + - nla_total_size(sizeof_field(struct xfrm_state, dir)); + nla_total_size(sizeof_field(struct xfrm_state, dir)) + + nla_total_size(4); /* XFRMA_SA_PCPU */ } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -3374,6 +3427,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct err = xfrm_if_id_put(skb, x->if_id); if (err) return err; + if (x->pcpu_num != UINT_MAX) { + err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); + if (err) + return err; + } if (x->dir) { err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); @@ -3481,6 +3539,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); + if (x->pcpu_num) + l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); @@ -3587,6 +3647,7 @@ static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x, + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + + nla_total_size(4) /* XFRMA_SA_PCPU */ + userpolicy_type_attrsize(); } @@ -3623,6 +3684,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, err = xfrm_if_id_put(skb, xp->if_id); if (!err && xp->xdo.dev) err = copy_user_offload(&xp->xdo, skb); + if (!err && x->pcpu_num != UINT_MAX) + err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); if (err) { nlmsg_cancel(skb, nlh); return err; |