diff options
Diffstat (limited to 'net')
251 files changed, 8312 insertions, 3331 deletions
diff --git a/net/802/hippi.c b/net/802/hippi.c index f80b33a8f7e0..887e73d520e4 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -121,7 +121,7 @@ int hippi_mac_addr(struct net_device *dev, void *p) struct sockaddr *addr = p; if (netif_running(dev)) return -EBUSY; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + dev_addr_set(dev, addr->sa_data); return 0; } EXPORT_SYMBOL(hippi_mac_addr); diff --git a/net/802/p8022.c b/net/802/p8022.c index a6585627051d..79c23173116c 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -23,7 +23,7 @@ #include <net/p8022.h> static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, - unsigned char *dest) + const unsigned char *dest) { llc_build_and_send_ui_pkt(dl->sap, skb, dest, dl->sap->laddr.lsap); return 0; diff --git a/net/802/psnap.c b/net/802/psnap.c index 4492e8d7ad20..1406bfdbda13 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -79,7 +79,7 @@ drop: * Put a SNAP header on a frame and pass to 802.2 */ static int snap_request(struct datalink_proto *dl, - struct sk_buff *skb, u8 *dest) + struct sk_buff *skb, const u8 *dest) { memcpy(skb_push(skb, 5), dl->type, 5); llc_build_and_send_ui_pkt(snap_sap, skb, dest, snap_sap->laddr.lsap); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 0c21d1fec852..90330b893134 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -250,7 +250,7 @@ bool vlan_dev_inherit_address(struct net_device *dev, if (dev->addr_assign_type != NET_ADDR_STOLEN) return false; - ether_addr_copy(dev->dev_addr, real_dev->dev_addr); + eth_hw_addr_set(dev, real_dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return true; } @@ -349,7 +349,7 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p) dev_uc_del(real_dev, dev->dev_addr); out: - ether_addr_copy(dev->dev_addr, addr->sa_data); + eth_hw_addr_set(dev, addr->sa_data); return 0; } @@ -586,7 +586,7 @@ static int vlan_dev_init(struct net_device *dev) dev->dev_id = real_dev->dev_id; if (is_zero_ether_addr(dev->dev_addr)) { - ether_addr_copy(dev->dev_addr, real_dev->dev_addr); + eth_hw_addr_set(dev, real_dev->dev_addr); dev->addr_assign_type = NET_ADDR_STOLEN; } if (is_zero_ether_addr(dev->broadcast)) diff --git a/net/Kconfig b/net/Kconfig index fb13460c6dab..074472dfa94a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID config NET_RX_BUSY_POLL bool - default y + default y if !PREEMPT_RT config BQL bool diff --git a/net/atm/br2684.c b/net/atm/br2684.c index dd2a8dabed84..11854fde52db 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -578,7 +578,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { unsigned char *esi = atmvcc->dev->esi; if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5]) - memcpy(net_dev->dev_addr, esi, net_dev->addr_len); + dev_addr_set(net_dev, esi); else net_dev->dev_addr[2] = 1; } diff --git a/net/atm/lec.c b/net/atm/lec.c index 7226c784dbe0..8eaea4a4bbd6 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -355,8 +355,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type); switch (mesg->type) { case l_set_mac_addr: - for (i = 0; i < 6; i++) - dev->dev_addr[i] = mesg->content.normal.mac_addr[i]; + eth_hw_addr_set(dev, mesg->content.normal.mac_addr); break; case l_del_mac_addr: for (i = 0; i < 6; i++) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2631efc6e359..2f34bbdde0e8 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -202,7 +202,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, * Find an AX.25 control block given both ends. It will only pick up * floating AX.25 control blocks or non Raw socket bound control blocks. */ -ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr, +ax25_cb *ax25_find_cb(const ax25_address *src_addr, ax25_address *dest_addr, ax25_digi *digi, struct net_device *dev) { ax25_cb *s; diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 4ac2e0847652..d0a043a51848 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -35,7 +35,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) spin_lock_bh(&ax25_dev_lock); for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) - if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) { + if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; } spin_unlock_bh(&ax25_dev_lock); diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index b4083f30af0d..979bc4b828a0 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -98,7 +98,7 @@ void ax25_linkfail_release(struct ax25_linkfail *lf) EXPORT_SYMBOL(ax25_linkfail_release); -int ax25_listen_register(ax25_address *callsign, struct net_device *dev) +int ax25_listen_register(const ax25_address *callsign, struct net_device *dev) { struct listen_struct *listen; @@ -121,7 +121,7 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev) EXPORT_SYMBOL(ax25_listen_register); -void ax25_listen_release(ax25_address *callsign, struct net_device *dev) +void ax25_listen_release(const ax25_address *callsign, struct net_device *dev) { struct listen_struct *s, *listen; @@ -171,7 +171,7 @@ int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *) return res; } -int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) +int ax25_listen_mine(const ax25_address *callsign, struct net_device *dev) { struct listen_struct *listen; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index cd6afe895db9..1cac25aca637 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -181,7 +181,7 @@ static int ax25_process_rx_frame(ax25_cb *ax25, struct sk_buff *skb, int type, i } static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, - ax25_address *dev_addr, struct packet_type *ptype) + const ax25_address *dev_addr, struct packet_type *ptype) { ax25_address src, dest, *next_digi = NULL; int type = 0, mine = 0, dama; @@ -447,5 +447,5 @@ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, skb_pull(skb, AX25_KISS_HEADER_LEN); /* Remove the KISS byte */ - return ax25_rcv(skb, dev, (ax25_address *)dev->dev_addr, ptype); + return ax25_rcv(skb, dev, (const ax25_address *)dev->dev_addr, ptype); } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 22f2f66c6e0a..3db76d2470e9 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(ax25_frag_lock); -ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev) +ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *src, ax25_address *dest, ax25_digi *digi, struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *ax25; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 1669744304c5..7242b32fff80 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -254,7 +254,7 @@ batadv_claim_hash_find(struct batadv_priv *bat_priv, * Return: backbone gateway if found or NULL otherwise */ static struct batadv_bla_backbone_gw * -batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, +batadv_backbone_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; @@ -336,7 +336,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ -static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, +static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, unsigned short vid, int claimtype) { struct sk_buff *skb; @@ -488,7 +488,7 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) * Return: the (possibly created) backbone gateway or NULL on error */ static struct batadv_bla_backbone_gw * -batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, +batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; @@ -926,7 +926,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, */ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - u8 *backbone_addr, u8 *claim_addr, + const u8 *backbone_addr, const u8 *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -964,7 +964,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, */ static bool batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - u8 *backbone_addr, u8 *claim_addr, + const u8 *backbone_addr, const u8 *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -2126,7 +2126,7 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_hard_iface *primary_if, struct batadv_bla_claim *claim) { - u8 *primary_addr = primary_if->net_dev->dev_addr; + const u8 *primary_addr = primary_if->net_dev->dev_addr; u16 backbone_crc; bool is_own; void *hdr; @@ -2294,7 +2294,7 @@ batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, struct batadv_hard_iface *primary_if, struct batadv_bla_backbone_gw *backbone_gw) { - u8 *primary_addr = primary_if->net_dev->dev_addr; + const u8 *primary_addr = primary_if->net_dev->dev_addr; u16 backbone_crc; bool is_own; int msecs; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index a3b6658ed789..433901dcf0c3 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -89,7 +89,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) rcu_read_lock(); do { upper = netdev_master_upper_dev_get_rcu(upper); - } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); + } while (upper && !netif_is_bridge_master(upper)); dev_hold(upper); rcu_read_unlock(); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 970d0d7ccc98..83f31494ea4d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -747,7 +747,8 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_hard_iface *primary_if = NULL; bool ret = false; - u8 *orig_addr, orig_ttvn; + const u8 *orig_addr; + u8 orig_ttvn; if (batadv_is_my_client(bat_priv, dst_addr, vid)) { primary_if = batadv_primary_if_get_selected(bat_priv); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0604b0279573..7ee09337fc40 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -134,7 +134,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) return -EADDRNOTAVAIL; ether_addr_copy(old_addr, dev->dev_addr); - ether_addr_copy(dev->dev_addr, addr->sa_data); + eth_hw_addr_set(dev, addr->sa_data); /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 56b9fe97b3b4..fbcb15c7c29b 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -631,9 +631,9 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node = NULL; const struct batadv_icmp_tp_packet *icmp; struct batadv_tp_vars *tp_vars; + const unsigned char *dev_addr; size_t packet_len, mss; u32 rtt, recv_ack, cwnd; - unsigned char *dev_addr; packet_len = BATADV_TP_PLEN; mss = BATADV_TP_PLEN; diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 992773376e51..0cb58eb04093 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -587,8 +587,8 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, * @tvlv_value: tvlv content * @tvlv_value_len: tvlv content length */ -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src, + const u8 *dst, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len) { struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index 54f2a35653d0..4cf8af00fc11 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -42,8 +42,8 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, u8 *src, u8 *dst, void *tvlv_buff, u16 tvlv_buff_len); -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src, + const u8 *dst, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len); #endif /* _NET_BATMAN_ADV_TVLV_H_ */ diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index cc0995301f93..291770fc9551 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -14,7 +14,8 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \ - ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o + ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o hci_codec.o \ + eir.o bluetooth-$(CONFIG_BT_BREDR) += sco.o bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c new file mode 100644 index 000000000000..7e930f77ecab --- /dev/null +++ b/net/bluetooth/eir.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2021 Intel Corporation + */ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include <net/bluetooth/mgmt.h> + +#include "eir.h" + +#define PNP_INFO_SVCLASS_ID 0x1200 + +u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +{ + size_t short_len; + size_t complete_len; + + /* no space left for name (+ NULL + type + len) */ + if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + return ad_len; + + /* use complete name if present and fits */ + complete_len = strlen(hdev->dev_name); + if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len + 1); + + /* use short name if present */ + short_len = strlen(hdev->short_name); + if (short_len) + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->short_name, short_len + 1); + + /* use shortened full name if present, we already know that name + * is longer then HCI_MAX_SHORT_NAME_LENGTH + */ + if (complete_len) { + u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; + + memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); + name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; + + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name, + sizeof(name)); + } + + return ad_len; +} + +u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +{ + return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance); +} + +static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 4) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + if (uuid->size != 16) + continue; + + uuid16 = get_unaligned_le16(&uuid->uuid[12]); + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID16_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u16) > len) { + uuids_start[1] = EIR_UUID16_SOME; + break; + } + + *ptr++ = (uuid16 & 0x00ff); + *ptr++ = (uuid16 & 0xff00) >> 8; + uuids_start[0] += sizeof(uuid16); + } + + return ptr; +} + +static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 6) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 32) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID32_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u32) > len) { + uuids_start[1] = EIR_UUID32_SOME; + break; + } + + memcpy(ptr, &uuid->uuid[12], sizeof(u32)); + ptr += sizeof(u32); + uuids_start[0] += sizeof(u32); + } + + return ptr; +} + +static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 18) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 128) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID128_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + 16 > len) { + uuids_start[1] = EIR_UUID128_SOME; + break; + } + + memcpy(ptr, uuid->uuid, 16); + ptr += 16; + uuids_start[0] += 16; + } + + return ptr; +} + +void eir_create(struct hci_dev *hdev, u8 *data) +{ + u8 *ptr = data; + size_t name_len; + + name_len = strlen(hdev->dev_name); + + if (name_len > 0) { + /* EIR Data type */ + if (name_len > 48) { + name_len = 48; + ptr[1] = EIR_NAME_SHORT; + } else { + ptr[1] = EIR_NAME_COMPLETE; + } + + /* EIR Data length */ + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + ptr += (name_len + 2); + } + + if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) { + ptr[0] = 2; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8)hdev->inq_tx_power; + + ptr += 3; + } + + if (hdev->devid_source > 0) { + ptr[0] = 9; + ptr[1] = EIR_DEVICE_ID; + + put_unaligned_le16(hdev->devid_source, ptr + 2); + put_unaligned_le16(hdev->devid_vendor, ptr + 4); + put_unaligned_le16(hdev->devid_product, ptr + 6); + put_unaligned_le16(hdev->devid_version, ptr + 8); + + ptr += 10; + } + + ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); +} + +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +{ + struct adv_info *adv = NULL; + u8 ad_len = 0, flags = 0; + u32 instance_flags; + + /* Return 0 when the current instance identifier is invalid. */ + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv) + return 0; + } + + instance_flags = hci_adv_instance_flags(hdev, instance); + + /* If instance already has the flags set skip adding it once + * again. + */ + if (adv && eir_get_data(adv->adv_data, adv->adv_data_len, EIR_FLAGS, + NULL)) + goto skip_flags; + + /* The Add Advertising command allows userspace to set both the general + * and limited discoverable flags. + */ + if (instance_flags & MGMT_ADV_FLAG_DISCOV) + flags |= LE_AD_GENERAL; + + if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) + flags |= LE_AD_LIMITED; + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { + /* If a discovery flag wasn't provided, simply use the global + * settings. + */ + if (!flags) + flags |= mgmt_get_adv_discov_flags(hdev); + + /* If flags would still be empty, then there is no need to + * include the "Flags" AD field". + */ + if (flags) { + ptr[0] = 0x02; + ptr[1] = EIR_FLAGS; + ptr[2] = flags; + + ad_len += 3; + ptr += 3; + } + } + +skip_flags: + if (adv) { + memcpy(ptr, adv->adv_data, adv->adv_data_len); + ad_len += adv->adv_data_len; + ptr += adv->adv_data_len; + } + + if (instance_flags & MGMT_ADV_FLAG_TX_POWER) { + s8 adv_tx_power; + + if (ext_adv_capable(hdev)) { + if (adv) + adv_tx_power = adv->tx_power; + else + adv_tx_power = hdev->adv_tx_power; + } else { + adv_tx_power = hdev->adv_tx_power; + } + + /* Provide Tx Power only if we can provide a valid value for it */ + if (adv_tx_power != HCI_TX_POWER_INVALID) { + ptr[0] = 0x02; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8)adv_tx_power; + + ad_len += 3; + ptr += 3; + } + } + + return ad_len; +} + +static u8 create_default_scan_rsp(struct hci_dev *hdev, u8 *ptr) +{ + u8 scan_rsp_len = 0; + + if (hdev->appearance) + scan_rsp_len = eir_append_appearance(hdev, ptr, scan_rsp_len); + + return eir_append_local_name(hdev, ptr, scan_rsp_len); +} + +u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr) +{ + struct adv_info *adv; + u8 scan_rsp_len = 0; + + if (!instance) + return create_default_scan_rsp(hdev, ptr); + + adv = hci_find_adv_instance(hdev, instance); + if (!adv) + return 0; + + if ((adv->flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) + scan_rsp_len = eir_append_appearance(hdev, ptr, scan_rsp_len); + + memcpy(&ptr[scan_rsp_len], adv->scan_rsp_data, adv->scan_rsp_len); + + scan_rsp_len += adv->scan_rsp_len; + + if (adv->flags & MGMT_ADV_FLAG_LOCAL_NAME) + scan_rsp_len = eir_append_local_name(hdev, ptr, scan_rsp_len); + + return scan_rsp_len; +} diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h new file mode 100644 index 000000000000..724662f8f8b1 --- /dev/null +++ b/net/bluetooth/eir.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2021 Intel Corporation + */ + +void eir_create(struct hci_dev *hdev, u8 *data); + +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); +u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); + +u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len); +u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len); + +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, + u8 *data, u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} + +static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, + size_t *data_len) +{ + size_t parsed = 0; + + if (eir_len < 2) + return NULL; + + while (parsed < eir_len - 1) { + u8 field_len = eir[0]; + + if (field_len == 0) + break; + + parsed += field_len + 1; + + if (parsed > eir_len) + break; + + if (eir[1] != type) { + eir += field_len + 1; + continue; + } + + /* Zero length data */ + if (field_len == 1) + return NULL; + + if (data_len) + *data_len = field_len - 1; + + return &eir[2]; + } + + return NULL; +} diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c new file mode 100644 index 000000000000..f0421d0edaa3 --- /dev/null +++ b/net/bluetooth/hci_codec.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (C) 2021 Intel Corporation */ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> +#include "hci_codec.h" + +static int hci_codec_list_add(struct list_head *list, + struct hci_op_read_local_codec_caps *sent, + struct hci_rp_read_local_codec_caps *rp, + void *caps, + __u32 len) +{ + struct codec_list *entry; + + entry = kzalloc(sizeof(*entry) + len, GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->id = sent->id; + if (sent->id == 0xFF) { + entry->cid = __le16_to_cpu(sent->cid); + entry->vid = __le16_to_cpu(sent->vid); + } + entry->transport = sent->transport; + entry->len = len; + entry->num_caps = rp->num_caps; + if (rp->num_caps) + memcpy(entry->caps, caps, len); + list_add(&entry->list, list); + + return 0; +} + +void hci_codec_list_clear(struct list_head *codec_list) +{ + struct codec_list *c, *n; + + list_for_each_entry_safe(c, n, codec_list, list) { + list_del(&c->list); + kfree(c); + } +} + +static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport, + struct hci_op_read_local_codec_caps + *cmd) +{ + __u8 i; + + for (i = 0; i < TRANSPORT_TYPE_MAX; i++) { + if (transport & BIT(i)) { + struct hci_rp_read_local_codec_caps *rp; + struct hci_codec_caps *caps; + struct sk_buff *skb; + __u8 j; + __u32 len; + + cmd->transport = i; + skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS, + sizeof(*cmd), cmd, + HCI_CMD_TIMEOUT); + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Failed to read codec capabilities (%ld)", + PTR_ERR(skb)); + continue; + } + + if (skb->len < sizeof(*rp)) + goto error; + + rp = (void *)skb->data; + + if (rp->status) + goto error; + + if (!rp->num_caps) { + len = 0; + /* this codec doesn't have capabilities */ + goto skip_caps_parse; + } + + skb_pull(skb, sizeof(*rp)); + + for (j = 0, len = 0; j < rp->num_caps; j++) { + caps = (void *)skb->data; + if (skb->len < sizeof(*caps)) + goto error; + if (skb->len < caps->len) + goto error; + len += sizeof(caps->len) + caps->len; + skb_pull(skb, sizeof(caps->len) + caps->len); + } + +skip_caps_parse: + hci_dev_lock(hdev); + hci_codec_list_add(&hdev->local_codecs, cmd, rp, + (__u8 *)rp + sizeof(*rp), len); + hci_dev_unlock(hdev); +error: + kfree_skb(skb); + } + } +} + +void hci_read_supported_codecs(struct hci_dev *hdev) +{ + struct sk_buff *skb; + struct hci_rp_read_local_supported_codecs *rp; + struct hci_std_codecs *std_codecs; + struct hci_vnd_codecs *vnd_codecs; + struct hci_op_read_local_codec_caps caps; + __u8 i; + + skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, + HCI_CMD_TIMEOUT); + + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", + PTR_ERR(skb)); + return; + } + + if (skb->len < sizeof(*rp)) + goto error; + + rp = (void *)skb->data; + + if (rp->status) + goto error; + + skb_pull(skb, sizeof(rp->status)); + + std_codecs = (void *)skb->data; + + /* validate codecs length before accessing */ + if (skb->len < flex_array_size(std_codecs, codec, std_codecs->num) + + sizeof(std_codecs->num)) + goto error; + + /* enumerate codec capabilities of standard codecs */ + memset(&caps, 0, sizeof(caps)); + for (i = 0; i < std_codecs->num; i++) { + caps.id = std_codecs->codec[i]; + caps.direction = 0x00; + hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + } + + skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num) + + sizeof(std_codecs->num)); + + vnd_codecs = (void *)skb->data; + + /* validate vendor codecs length before accessing */ + if (skb->len < + flex_array_size(vnd_codecs, codec, vnd_codecs->num) + + sizeof(vnd_codecs->num)) + goto error; + + /* enumerate vendor codec capabilities */ + for (i = 0; i < vnd_codecs->num; i++) { + caps.id = 0xFF; + caps.cid = vnd_codecs->codec[i].cid; + caps.vid = vnd_codecs->codec[i].vid; + caps.direction = 0x00; + hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + } + +error: + kfree_skb(skb); +} + +void hci_read_supported_codecs_v2(struct hci_dev *hdev) +{ + struct sk_buff *skb; + struct hci_rp_read_local_supported_codecs_v2 *rp; + struct hci_std_codecs_v2 *std_codecs; + struct hci_vnd_codecs_v2 *vnd_codecs; + struct hci_op_read_local_codec_caps caps; + __u8 i; + + skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL, + HCI_CMD_TIMEOUT); + + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", + PTR_ERR(skb)); + return; + } + + if (skb->len < sizeof(*rp)) + goto error; + + rp = (void *)skb->data; + + if (rp->status) + goto error; + + skb_pull(skb, sizeof(rp->status)); + + std_codecs = (void *)skb->data; + + /* check for payload data length before accessing */ + if (skb->len < flex_array_size(std_codecs, codec, std_codecs->num) + + sizeof(std_codecs->num)) + goto error; + + memset(&caps, 0, sizeof(caps)); + + for (i = 0; i < std_codecs->num; i++) { + caps.id = std_codecs->codec[i].id; + hci_read_codec_capabilities(hdev, std_codecs->codec[i].transport, + &caps); + } + + skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num) + + sizeof(std_codecs->num)); + + vnd_codecs = (void *)skb->data; + + /* check for payload data length before accessing */ + if (skb->len < + flex_array_size(vnd_codecs, codec, vnd_codecs->num) + + sizeof(vnd_codecs->num)) + goto error; + + for (i = 0; i < vnd_codecs->num; i++) { + caps.id = 0xFF; + caps.cid = vnd_codecs->codec[i].cid; + caps.vid = vnd_codecs->codec[i].vid; + hci_read_codec_capabilities(hdev, vnd_codecs->codec[i].transport, + &caps); + } + +error: + kfree_skb(skb); +} diff --git a/net/bluetooth/hci_codec.h b/net/bluetooth/hci_codec.h new file mode 100644 index 000000000000..a2751930f123 --- /dev/null +++ b/net/bluetooth/hci_codec.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Copyright (C) 2014 Intel Corporation */ + +void hci_read_supported_codecs(struct hci_dev *hdev); +void hci_read_supported_codecs_v2(struct hci_dev *hdev); +void hci_codec_list_clear(struct list_head *codec_list); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2b5059a56cda..bd669c95b9a7 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -307,13 +307,133 @@ static bool find_next_esco_param(struct hci_conn *conn, return conn->attempt <= size; } -bool hci_setup_sync(struct hci_conn *conn, __u16 handle) +static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_cp_enhanced_setup_sync_conn cp; + const struct sco_param *param; + + bt_dev_dbg(hdev, "hcon %p", conn); + + /* for offload use case, codec needs to configured before opening SCO */ + if (conn->codec.data_path) + hci_req_configure_datapath(hdev, &conn->codec); + + conn->state = BT_CONNECT; + conn->out = true; + + conn->attempt++; + + memset(&cp, 0x00, sizeof(cp)); + + cp.handle = cpu_to_le16(handle); + + cp.tx_bandwidth = cpu_to_le32(0x00001f40); + cp.rx_bandwidth = cpu_to_le32(0x00001f40); + + switch (conn->codec.id) { + case BT_CODEC_MSBC: + if (!find_next_esco_param(conn, esco_param_msbc, + ARRAY_SIZE(esco_param_msbc))) + return false; + + param = &esco_param_msbc[conn->attempt - 1]; + cp.tx_coding_format.id = 0x05; + cp.rx_coding_format.id = 0x05; + cp.tx_codec_frame_size = __cpu_to_le16(60); + cp.rx_codec_frame_size = __cpu_to_le16(60); + cp.in_bandwidth = __cpu_to_le32(32000); + cp.out_bandwidth = __cpu_to_le32(32000); + cp.in_coding_format.id = 0x04; + cp.out_coding_format.id = 0x04; + cp.in_coded_data_size = __cpu_to_le16(16); + cp.out_coded_data_size = __cpu_to_le16(16); + cp.in_pcm_data_format = 2; + cp.out_pcm_data_format = 2; + cp.in_pcm_sample_payload_msb_pos = 0; + cp.out_pcm_sample_payload_msb_pos = 0; + cp.in_data_path = conn->codec.data_path; + cp.out_data_path = conn->codec.data_path; + cp.in_transport_unit_size = 1; + cp.out_transport_unit_size = 1; + break; + + case BT_CODEC_TRANSPARENT: + if (!find_next_esco_param(conn, esco_param_msbc, + ARRAY_SIZE(esco_param_msbc))) + return false; + param = &esco_param_msbc[conn->attempt - 1]; + cp.tx_coding_format.id = 0x03; + cp.rx_coding_format.id = 0x03; + cp.tx_codec_frame_size = __cpu_to_le16(60); + cp.rx_codec_frame_size = __cpu_to_le16(60); + cp.in_bandwidth = __cpu_to_le32(0x1f40); + cp.out_bandwidth = __cpu_to_le32(0x1f40); + cp.in_coding_format.id = 0x03; + cp.out_coding_format.id = 0x03; + cp.in_coded_data_size = __cpu_to_le16(16); + cp.out_coded_data_size = __cpu_to_le16(16); + cp.in_pcm_data_format = 2; + cp.out_pcm_data_format = 2; + cp.in_pcm_sample_payload_msb_pos = 0; + cp.out_pcm_sample_payload_msb_pos = 0; + cp.in_data_path = conn->codec.data_path; + cp.out_data_path = conn->codec.data_path; + cp.in_transport_unit_size = 1; + cp.out_transport_unit_size = 1; + break; + + case BT_CODEC_CVSD: + if (lmp_esco_capable(conn->link)) { + if (!find_next_esco_param(conn, esco_param_cvsd, + ARRAY_SIZE(esco_param_cvsd))) + return false; + param = &esco_param_cvsd[conn->attempt - 1]; + } else { + if (conn->attempt > ARRAY_SIZE(sco_param_cvsd)) + return false; + param = &sco_param_cvsd[conn->attempt - 1]; + } + cp.tx_coding_format.id = 2; + cp.rx_coding_format.id = 2; + cp.tx_codec_frame_size = __cpu_to_le16(60); + cp.rx_codec_frame_size = __cpu_to_le16(60); + cp.in_bandwidth = __cpu_to_le32(16000); + cp.out_bandwidth = __cpu_to_le32(16000); + cp.in_coding_format.id = 4; + cp.out_coding_format.id = 4; + cp.in_coded_data_size = __cpu_to_le16(16); + cp.out_coded_data_size = __cpu_to_le16(16); + cp.in_pcm_data_format = 2; + cp.out_pcm_data_format = 2; + cp.in_pcm_sample_payload_msb_pos = 0; + cp.out_pcm_sample_payload_msb_pos = 0; + cp.in_data_path = conn->codec.data_path; + cp.out_data_path = conn->codec.data_path; + cp.in_transport_unit_size = 16; + cp.out_transport_unit_size = 16; + break; + default: + return false; + } + + cp.retrans_effort = param->retrans_effort; + cp.pkt_type = __cpu_to_le16(param->pkt_type); + cp.max_latency = __cpu_to_le16(param->max_latency); + + if (hci_send_cmd(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0) + return false; + + return true; +} + +static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle) { struct hci_dev *hdev = conn->hdev; struct hci_cp_setup_sync_conn cp; const struct sco_param *param; - BT_DBG("hcon %p", conn); + bt_dev_dbg(hdev, "hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; @@ -359,6 +479,14 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle) return true; } +bool hci_setup_sync(struct hci_conn *conn, __u16 handle) +{ + if (enhanced_sco_capable(conn->hdev)) + return hci_enhanced_setup_sync_conn(conn, handle); + + return hci_setup_sync_conn(conn, handle); +} + u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier) { @@ -1040,8 +1168,8 @@ static void hci_req_directed_advertising(struct hci_request *req, } struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, - u8 dst_type, u8 sec_level, u16 conn_timeout, - u8 role, bdaddr_t *direct_rpa) + u8 dst_type, bool dst_resolved, u8 sec_level, + u16 conn_timeout, u8 role, bdaddr_t *direct_rpa) { struct hci_conn_params *params; struct hci_conn *conn; @@ -1078,19 +1206,24 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EBUSY); } - /* When given an identity address with existing identity - * resolving key, the connection needs to be established - * to a resolvable random address. - * - * Storing the resolvable random address is required here - * to handle connection failures. The address will later - * be resolved back into the original identity address - * from the connect request. + /* Check if the destination address has been resolved by the controller + * since if it did then the identity address shall be used. */ - irk = hci_find_irk_by_addr(hdev, dst, dst_type); - if (irk && bacmp(&irk->rpa, BDADDR_ANY)) { - dst = &irk->rpa; - dst_type = ADDR_LE_DEV_RANDOM; + if (!dst_resolved) { + /* When given an identity address with existing identity + * resolving key, the connection needs to be established + * to a resolvable random address. + * + * Storing the resolvable random address is required here + * to handle connection failures. The address will later + * be resolved back into the original identity address + * from the connect request. + */ + irk = hci_find_irk_by_addr(hdev, dst, dst_type); + if (irk && bacmp(&irk->rpa, BDADDR_ANY)) { + dst = &irk->rpa; + dst_type = ADDR_LE_DEV_RANDOM; + } } if (conn) { @@ -1319,7 +1452,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, } struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u16 setting) + __u16 setting, struct bt_codec *codec) { struct hci_conn *acl; struct hci_conn *sco; @@ -1344,6 +1477,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, hci_conn_hold(sco); sco->setting = setting; + sco->codec = *codec; if (acl->state == BT_CONNECTED && (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8a47a3017d61..8d33aa64846b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -45,6 +45,7 @@ #include "leds.h" #include "msft.h" #include "aosp.h" +#include "hci_codec.h" static void hci_rx_work(struct work_struct *work); static void hci_cmd_work(struct work_struct *work); @@ -61,130 +62,6 @@ DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); -/* ---- HCI debugfs entries ---- */ - -static ssize_t dut_mode_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - struct sk_buff *skb; - bool enable; - int err; - - if (!test_bit(HCI_UP, &hdev->flags)) - return -ENETDOWN; - - err = kstrtobool_from_user(user_buf, count, &enable); - if (err) - return err; - - if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) - return -EALREADY; - - hci_req_sync_lock(hdev); - if (enable) - skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL, - HCI_CMD_TIMEOUT); - else - skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, - HCI_CMD_TIMEOUT); - hci_req_sync_unlock(hdev); - - if (IS_ERR(skb)) - return PTR_ERR(skb); - - kfree_skb(skb); - - hci_dev_change_flag(hdev, HCI_DUT_MODE); - - return count; -} - -static const struct file_operations dut_mode_fops = { - .open = simple_open, - .read = dut_mode_read, - .write = dut_mode_write, - .llseek = default_llseek, -}; - -static ssize_t vendor_diag_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - bool enable; - int err; - - err = kstrtobool_from_user(user_buf, count, &enable); - if (err) - return err; - - /* When the diagnostic flags are not persistent and the transport - * is not active or in user channel operation, then there is no need - * for the vendor callback. Instead just store the desired value and - * the setting will be programmed when the controller gets powered on. - */ - if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && - (!test_bit(HCI_RUNNING, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) - goto done; - - hci_req_sync_lock(hdev); - err = hdev->set_diag(hdev, enable); - hci_req_sync_unlock(hdev); - - if (err < 0) - return err; - -done: - if (enable) - hci_dev_set_flag(hdev, HCI_VENDOR_DIAG); - else - hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG); - - return count; -} - -static const struct file_operations vendor_diag_fops = { - .open = simple_open, - .read = vendor_diag_read, - .write = vendor_diag_write, - .llseek = default_llseek, -}; - -static void hci_debugfs_create_basic(struct hci_dev *hdev) -{ - debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev, - &dut_mode_fops); - - if (hdev->set_diag) - debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev, - &vendor_diag_fops); -} - static int hci_reset_req(struct hci_request *req, unsigned long opt) { BT_DBG("%s %ld", req->hdev->name, opt); @@ -838,10 +715,6 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) if (hdev->commands[22] & 0x04) hci_set_event_mask_page_2(req); - /* Read local codec list if the HCI command is supported */ - if (hdev->commands[29] & 0x20) - hci_req_add(req, HCI_OP_READ_LOCAL_CODECS, 0, NULL); - /* Read local pairing options if the HCI command is supported */ if (hdev->commands[41] & 0x08) hci_req_add(req, HCI_OP_READ_LOCAL_PAIRING_OPTS, 0, NULL); @@ -937,6 +810,12 @@ static int __hci_init(struct hci_dev *hdev) if (err < 0) return err; + /* Read local codec list if the HCI command is supported */ + if (hdev->commands[45] & 0x04) + hci_read_supported_codecs_v2(hdev); + else if (hdev->commands[29] & 0x20) + hci_read_supported_codecs(hdev); + /* This function is only called when the controller is actually in * configured state. When the controller is marked as unconfigured, * this initialization procedure is not run. @@ -1848,6 +1727,7 @@ int hci_dev_do_close(struct hci_dev *hdev) memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); + hci_codec_list_clear(&hdev->local_codecs); hci_req_sync_unlock(hdev); @@ -3081,6 +2961,60 @@ int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, } /* This function requires the caller holds hdev->lock */ +u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance) +{ + u32 flags; + struct adv_info *adv; + + if (instance == 0x00) { + /* Instance 0 always manages the "Tx Power" and "Flags" + * fields + */ + flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; + + /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting + * corresponds to the "connectable" instance flag. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) + flags |= MGMT_ADV_FLAG_CONNECTABLE; + + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) + flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; + else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) + flags |= MGMT_ADV_FLAG_DISCOV; + + return flags; + } + + adv = hci_find_adv_instance(hdev, instance); + + /* Return 0 when we got an invalid instance identifier. */ + if (!adv) + return 0; + + return adv->flags; +} + +bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance) +{ + struct adv_info *adv; + + /* Instance 0x00 always set local name */ + if (instance == 0x00) + return true; + + adv = hci_find_adv_instance(hdev, instance); + if (!adv) + return false; + + if (adv->flags & MGMT_ADV_FLAG_APPEARANCE || + adv->flags & MGMT_ADV_FLAG_LOCAL_NAME) + return true; + + return adv->scan_rsp_len ? true : false; +} + +/* This function requires the caller holds hdev->lock */ void hci_adv_monitors_clear(struct hci_dev *hdev) { struct adv_monitor *monitor; @@ -3487,15 +3421,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, { struct hci_conn_params *param; - switch (addr_type) { - case ADDR_LE_DEV_PUBLIC_RESOLVED: - addr_type = ADDR_LE_DEV_PUBLIC; - break; - case ADDR_LE_DEV_RANDOM_RESOLVED: - addr_type = ADDR_LE_DEV_RANDOM; - break; - } - list_for_each_entry(param, list, action) { if (bacmp(¶m->addr, addr) == 0 && param->addr_type == addr_type) @@ -3701,55 +3626,12 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, struct hci_dev *hdev = container_of(nb, struct hci_dev, suspend_notifier); int ret = 0; - u8 state = BT_RUNNING; - /* If powering down, wait for completion. */ - if (mgmt_powering_down(hdev)) { - set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks); - ret = hci_suspend_wait_event(hdev); - if (ret) - goto done; - } - - /* Suspend notifier should only act on events when powered. */ - if (!hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_UNREGISTER)) - goto done; + if (action == PM_SUSPEND_PREPARE) + ret = hci_suspend_dev(hdev); + else if (action == PM_POST_SUSPEND) + ret = hci_resume_dev(hdev); - if (action == PM_SUSPEND_PREPARE) { - /* Suspend consists of two actions: - * - First, disconnect everything and make the controller not - * connectable (disabling scanning) - * - Second, program event filter/accept list and enable scan - */ - ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT); - if (!ret) - state = BT_SUSPEND_DISCONNECT; - - /* Only configure accept list if disconnect succeeded and wake - * isn't being prevented. - */ - if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) { - ret = hci_change_suspend_state(hdev, - BT_SUSPEND_CONFIGURE_WAKE); - if (!ret) - state = BT_SUSPEND_CONFIGURE_WAKE; - } - - hci_clear_wake_reason(hdev); - mgmt_suspending(hdev, state); - - } else if (action == PM_POST_SUSPEND) { - ret = hci_change_suspend_state(hdev, BT_RUNNING); - - mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr, - hdev->wake_addr_type); - } - -done: - /* We always allow suspend even if suspend preparation failed and - * attempt to recover in resume. - */ if (ret) bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d", action, ret); @@ -3857,6 +3739,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) INIT_LIST_HEAD(&hdev->adv_instances); INIT_LIST_HEAD(&hdev->blocked_keys); + INIT_LIST_HEAD(&hdev->local_codecs); INIT_WORK(&hdev->rx_work, hci_rx_work); INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); @@ -3994,6 +3877,7 @@ int hci_register_dev(struct hci_dev *hdev) queue_work(hdev->req_workqueue, &hdev->power_on); idr_init(&hdev->adv_monitors_idr); + msft_register(hdev); return id; @@ -4026,6 +3910,8 @@ void hci_unregister_dev(struct hci_dev *hdev) cancel_work_sync(&hdev->suspend_prepare); } + msft_unregister(hdev); + hci_dev_do_close(hdev); if (!test_bit(HCI_INIT, &hdev->flags) && @@ -4088,16 +3974,78 @@ EXPORT_SYMBOL(hci_release_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { + int ret; + u8 state = BT_RUNNING; + + bt_dev_dbg(hdev, ""); + + /* Suspend should only act on when powered. */ + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return 0; + + /* If powering down, wait for completion. */ + if (mgmt_powering_down(hdev)) { + set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks); + ret = hci_suspend_wait_event(hdev); + if (ret) + goto done; + } + + /* Suspend consists of two actions: + * - First, disconnect everything and make the controller not + * connectable (disabling scanning) + * - Second, program event filter/accept list and enable scan + */ + ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT); + if (ret) + goto clear; + + state = BT_SUSPEND_DISCONNECT; + + /* Only configure accept list if device may wakeup. */ + if (hdev->wakeup && hdev->wakeup(hdev)) { + ret = hci_change_suspend_state(hdev, BT_SUSPEND_CONFIGURE_WAKE); + if (!ret) + state = BT_SUSPEND_CONFIGURE_WAKE; + } + +clear: + hci_clear_wake_reason(hdev); + mgmt_suspending(hdev, state); + +done: + /* We always allow suspend even if suspend preparation failed and + * attempt to recover in resume. + */ hci_sock_dev_event(hdev, HCI_DEV_SUSPEND); - return 0; + return ret; } EXPORT_SYMBOL(hci_suspend_dev); /* Resume HCI device */ int hci_resume_dev(struct hci_dev *hdev) { + int ret; + + bt_dev_dbg(hdev, ""); + + /* Resume should only act on when powered. */ + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return 0; + + /* If powering down don't attempt to resume */ + if (mgmt_powering_down(hdev)) + return 0; + + ret = hci_change_suspend_state(hdev, BT_RUNNING); + + mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr, + hdev->wake_addr_type); + hci_sock_dev_event(hdev, HCI_DEV_RESUME); - return 0; + return ret; } EXPORT_SYMBOL(hci_resume_dev); diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 841393389f7b..902b40a90b91 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -27,6 +27,7 @@ #include <net/bluetooth/hci_core.h> #include "smp.h" +#include "hci_request.h" #include "hci_debugfs.h" #define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \ @@ -1250,3 +1251,125 @@ void hci_debugfs_create_conn(struct hci_conn *conn) snprintf(name, sizeof(name), "%u", conn->handle); conn->debugfs = debugfs_create_dir(name, hdev->debugfs); } + +static ssize_t dut_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + struct sk_buff *skb; + bool enable; + int err; + + if (!test_bit(HCI_UP, &hdev->flags)) + return -ENETDOWN; + + err = kstrtobool_from_user(user_buf, count, &enable); + if (err) + return err; + + if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) + return -EALREADY; + + hci_req_sync_lock(hdev); + if (enable) + skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL, + HCI_CMD_TIMEOUT); + else + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, + HCI_CMD_TIMEOUT); + hci_req_sync_unlock(hdev); + + if (IS_ERR(skb)) + return PTR_ERR(skb); + + kfree_skb(skb); + + hci_dev_change_flag(hdev, HCI_DUT_MODE); + + return count; +} + +static const struct file_operations dut_mode_fops = { + .open = simple_open, + .read = dut_mode_read, + .write = dut_mode_write, + .llseek = default_llseek, +}; + +static ssize_t vendor_diag_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + bool enable; + int err; + + err = kstrtobool_from_user(user_buf, count, &enable); + if (err) + return err; + + /* When the diagnostic flags are not persistent and the transport + * is not active or in user channel operation, then there is no need + * for the vendor callback. Instead just store the desired value and + * the setting will be programmed when the controller gets powered on. + */ + if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && + (!test_bit(HCI_RUNNING, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_USER_CHANNEL))) + goto done; + + hci_req_sync_lock(hdev); + err = hdev->set_diag(hdev, enable); + hci_req_sync_unlock(hdev); + + if (err < 0) + return err; + +done: + if (enable) + hci_dev_set_flag(hdev, HCI_VENDOR_DIAG); + else + hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG); + + return count; +} + +static const struct file_operations vendor_diag_fops = { + .open = simple_open, + .read = vendor_diag_read, + .write = vendor_diag_write, + .llseek = default_llseek, +}; + +void hci_debugfs_create_basic(struct hci_dev *hdev) +{ + debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev, + &dut_mode_fops); + + if (hdev->set_diag) + debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev, + &vendor_diag_fops); +} diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h index 4444dc8cedc2..9a8a7c93bb12 100644 --- a/net/bluetooth/hci_debugfs.h +++ b/net/bluetooth/hci_debugfs.h @@ -26,6 +26,7 @@ void hci_debugfs_create_common(struct hci_dev *hdev); void hci_debugfs_create_bredr(struct hci_dev *hdev); void hci_debugfs_create_le(struct hci_dev *hdev); void hci_debugfs_create_conn(struct hci_conn *conn); +void hci_debugfs_create_basic(struct hci_dev *hdev); #else @@ -45,4 +46,8 @@ static inline void hci_debugfs_create_conn(struct hci_conn *conn) { } +static inline void hci_debugfs_create_basic(struct hci_dev *hdev) +{ +} + #endif diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0bca035bf2dc..7d0db1ca1248 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -36,6 +36,7 @@ #include "amp.h" #include "smp.h" #include "msft.h" +#include "eir.h" #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" @@ -2278,6 +2279,41 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } +static void hci_cs_enhanced_setup_sync_conn(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_enhanced_setup_sync_conn *cp; + struct hci_conn *acl, *sco; + __u16 handle; + + bt_dev_dbg(hdev, "status 0x%2.2x", status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN); + if (!cp) + return; + + handle = __le16_to_cpu(cp->handle); + + bt_dev_dbg(hdev, "handle 0x%4.4x", handle); + + hci_dev_lock(hdev); + + acl = hci_conn_hash_lookup_handle(hdev, handle); + if (acl) { + sco = acl->link; + if (sco) { + sco->state = BT_CLOSED; + + hci_connect_cfm(sco, status); + hci_conn_del(sco); + } + } + + hci_dev_unlock(hdev); +} + static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status) { struct hci_cp_sniff_mode *cp; @@ -2351,7 +2387,7 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); - if (conn->type == LE_LINK) { + if (conn->type == LE_LINK && conn->role == HCI_ROLE_SLAVE) { hdev->cur_adv_instance = conn->adv_instance; hci_req_reenable_advertising(hdev); } @@ -2367,6 +2403,28 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) hci_dev_unlock(hdev); } +static u8 ev_bdaddr_type(struct hci_dev *hdev, u8 type, bool *resolved) +{ + /* When using controller based address resolution, then the new + * address types 0x02 and 0x03 are used. These types need to be + * converted back into either public address or random address type + */ + switch (type) { + case ADDR_LE_DEV_PUBLIC_RESOLVED: + if (resolved) + *resolved = true; + return ADDR_LE_DEV_PUBLIC; + case ADDR_LE_DEV_RANDOM_RESOLVED: + if (resolved) + *resolved = true; + return ADDR_LE_DEV_RANDOM; + } + + if (resolved) + *resolved = false; + return type; +} + static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr, u8 peer_addr_type, u8 own_address_type, u8 filter_policy) @@ -2378,21 +2436,7 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr, if (!conn) return; - /* When using controller based address resolution, then the new - * address types 0x02 and 0x03 are used. These types need to be - * converted back into either public address or random address type - */ - if (use_ll_privacy(hdev) && - hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { - switch (own_address_type) { - case ADDR_LE_DEV_PUBLIC_RESOLVED: - own_address_type = ADDR_LE_DEV_PUBLIC; - break; - case ADDR_LE_DEV_RANDOM_RESOLVED: - own_address_type = ADDR_LE_DEV_RANDOM; - break; - } - } + own_address_type = ev_bdaddr_type(hdev, own_address_type, NULL); /* Store the initiator and responder address information which * is needed for SMP. These values will not change during the @@ -2961,7 +3005,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) * or until a connection is created or until the Advertising * is timed out due to Directed Advertising." */ - if (conn->type == LE_LINK) { + if (conn->type == LE_LINK && conn->role == HCI_ROLE_SLAVE) { hdev->cur_adv_instance = conn->adv_instance; hci_req_reenable_advertising(hdev); } @@ -3756,6 +3800,10 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cs_setup_sync_conn(hdev, ev->status); break; + case HCI_OP_ENHANCED_SETUP_SYNC_CONN: + hci_cs_enhanced_setup_sync_conn(hdev, ev->status); + break; + case HCI_OP_SNIFF_MODE: hci_cs_sniff_mode(hdev, ev->status); break; @@ -4397,6 +4445,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, { struct hci_ev_sync_conn_complete *ev = (void *) skb->data; struct hci_conn *conn; + unsigned int notify_evt; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); @@ -4471,15 +4520,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, switch (ev->air_mode) { case 0x02: - if (hdev->notify) - hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_CVSD); + notify_evt = HCI_NOTIFY_ENABLE_SCO_CVSD; break; case 0x03: - if (hdev->notify) - hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_TRANSP); + notify_evt = HCI_NOTIFY_ENABLE_SCO_TRANSP; break; } + /* Notify only in case of SCO over HCI transport data path which + * is zero and non-zero value shall be non-HCI transport data path + */ + if (conn->codec.data_path == 0) { + if (hdev->notify) + hdev->notify(hdev, notify_evt); + } + hci_connect_cfm(conn, ev->status); if (ev->status) hci_conn_del(conn); @@ -5282,22 +5337,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->dst_type = irk->addr_type; } - /* When using controller based address resolution, then the new - * address types 0x02 and 0x03 are used. These types need to be - * converted back into either public address or random address type - */ - if (use_ll_privacy(hdev) && - hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && - hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { - switch (conn->dst_type) { - case ADDR_LE_DEV_PUBLIC_RESOLVED: - conn->dst_type = ADDR_LE_DEV_PUBLIC; - break; - case ADDR_LE_DEV_RANDOM_RESOLVED: - conn->dst_type = ADDR_LE_DEV_RANDOM; - break; - } - } + conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL); if (status) { hci_le_conn_failed(conn, status); @@ -5479,8 +5519,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, /* This function requires the caller holds hdev->lock */ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, - u8 addr_type, u8 adv_type, - bdaddr_t *direct_rpa) + u8 addr_type, bool addr_resolved, + u8 adv_type, bdaddr_t *direct_rpa) { struct hci_conn *conn; struct hci_conn_params *params; @@ -5532,9 +5572,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, } } - conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW, - hdev->def_le_autoconnect_timeout, HCI_ROLE_MASTER, - direct_rpa); + conn = hci_connect_le(hdev, addr, addr_type, addr_resolved, + BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout, + HCI_ROLE_MASTER, direct_rpa); if (!IS_ERR(conn)) { /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned * by higher layer that tried to connect, if no then @@ -5575,7 +5615,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; struct hci_conn *conn; - bool match; + bool match, bdaddr_resolved; u32 flags; u8 *ptr; @@ -5619,6 +5659,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * controller address. */ if (direct_addr) { + direct_addr_type = ev_bdaddr_type(hdev, direct_addr_type, + &bdaddr_resolved); + /* Only resolvable random addresses are valid for these * kind of reports and others can be ignored. */ @@ -5646,13 +5689,15 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, bdaddr_type = irk->addr_type; } + bdaddr_type = ev_bdaddr_type(hdev, bdaddr_type, &bdaddr_resolved); + /* Check if we have been requested to connect to this device. * * direct_addr is set only for directed advertising reports (it is NULL * for advertising reports) and is already verified to be RPA above. */ - conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type, - direct_addr); + conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, + type, direct_addr); if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) { /* Store report for later inclusion by * mgmt_device_connected diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index f15626607b2d..92611bfc0b9e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -30,6 +30,7 @@ #include "smp.h" #include "hci_request.h" #include "msft.h" +#include "eir.h" #define HCI_REQ_DONE 0 #define HCI_REQ_PEND 1 @@ -521,164 +522,6 @@ void __hci_req_update_name(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); } -#define PNP_INFO_SVCLASS_ID 0x1200 - -static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 4) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - u16 uuid16; - - if (uuid->size != 16) - continue; - - uuid16 = get_unaligned_le16(&uuid->uuid[12]); - if (uuid16 < 0x1100) - continue; - - if (uuid16 == PNP_INFO_SVCLASS_ID) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID16_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + sizeof(u16) > len) { - uuids_start[1] = EIR_UUID16_SOME; - break; - } - - *ptr++ = (uuid16 & 0x00ff); - *ptr++ = (uuid16 & 0xff00) >> 8; - uuids_start[0] += sizeof(uuid16); - } - - return ptr; -} - -static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 6) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - if (uuid->size != 32) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID32_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + sizeof(u32) > len) { - uuids_start[1] = EIR_UUID32_SOME; - break; - } - - memcpy(ptr, &uuid->uuid[12], sizeof(u32)); - ptr += sizeof(u32); - uuids_start[0] += sizeof(u32); - } - - return ptr; -} - -static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 18) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - if (uuid->size != 128) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID128_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + 16 > len) { - uuids_start[1] = EIR_UUID128_SOME; - break; - } - - memcpy(ptr, uuid->uuid, 16); - ptr += 16; - uuids_start[0] += 16; - } - - return ptr; -} - -static void create_eir(struct hci_dev *hdev, u8 *data) -{ - u8 *ptr = data; - size_t name_len; - - name_len = strlen(hdev->dev_name); - - if (name_len > 0) { - /* EIR Data type */ - if (name_len > 48) { - name_len = 48; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - /* EIR Data length */ - ptr[0] = name_len + 1; - - memcpy(ptr + 2, hdev->dev_name, name_len); - - ptr += (name_len + 2); - } - - if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) { - ptr[0] = 2; - ptr[1] = EIR_TX_POWER; - ptr[2] = (u8) hdev->inq_tx_power; - - ptr += 3; - } - - if (hdev->devid_source > 0) { - ptr[0] = 9; - ptr[1] = EIR_DEVICE_ID; - - put_unaligned_le16(hdev->devid_source, ptr + 2); - put_unaligned_le16(hdev->devid_vendor, ptr + 4); - put_unaligned_le16(hdev->devid_product, ptr + 6); - put_unaligned_le16(hdev->devid_version, ptr + 8); - - ptr += 10; - } - - ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); - ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); - ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); -} - void __hci_req_update_eir(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -698,7 +541,7 @@ void __hci_req_update_eir(struct hci_request *req) memset(&cp, 0, sizeof(cp)); - create_eir(hdev, cp.data); + eir_create(hdev, cp.data); if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) return; @@ -1134,25 +977,6 @@ void hci_req_add_le_passive_scan(struct hci_request *req) addr_resolv); } -static bool adv_instance_is_scannable(struct hci_dev *hdev, u8 instance) -{ - struct adv_info *adv_instance; - - /* Instance 0x00 always set local name */ - if (instance == 0x00) - return true; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return false; - - if (adv_instance->flags & MGMT_ADV_FLAG_APPEARANCE || - adv_instance->flags & MGMT_ADV_FLAG_LOCAL_NAME) - return true; - - return adv_instance->scan_rsp_len ? true : false; -} - static void hci_req_clear_event_filter(struct hci_request *req) { struct hci_cp_set_event_filter f; @@ -1281,21 +1105,24 @@ static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) } } -static void hci_req_add_set_adv_filter_enable(struct hci_request *req, - bool enable) +static void hci_req_prepare_adv_monitor_suspend(struct hci_request *req, + bool suspending) { struct hci_dev *hdev = req->hdev; switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_MSFT: - msft_req_add_set_filter_enable(req, enable); + if (suspending) + msft_suspend(hdev); + else + msft_resume(hdev); break; default: return; } /* No need to block when enabling since it's on resume path */ - if (hdev->suspended && !enable) + if (hdev->suspended && suspending) set_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks); } @@ -1362,7 +1189,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) } /* Disable advertisement filters */ - hci_req_add_set_adv_filter_enable(&req, false); + hci_req_prepare_adv_monitor_suspend(&req, true); /* Prevent disconnects from causing scanning to be re-enabled */ hdev->scanning_paused = true; @@ -1404,7 +1231,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Reset passive/background scanning to normal */ __hci_update_background_scan(&req); /* Enable all of the advertisement filters */ - hci_req_add_set_adv_filter_enable(&req, true); + hci_req_prepare_adv_monitor_suspend(&req, false); /* Unpause directed advertising */ hdev->advertising_paused = false; @@ -1442,7 +1269,7 @@ done: static bool adv_cur_instance_is_scannable(struct hci_dev *hdev) { - return adv_instance_is_scannable(hdev, hdev->cur_adv_instance); + return hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance); } void __hci_req_disable_advertising(struct hci_request *req) @@ -1457,40 +1284,6 @@ void __hci_req_disable_advertising(struct hci_request *req) } } -static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) -{ - u32 flags; - struct adv_info *adv_instance; - - if (instance == 0x00) { - /* Instance 0 always manages the "Tx Power" and "Flags" - * fields - */ - flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; - - /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting - * corresponds to the "connectable" instance flag. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) - flags |= MGMT_ADV_FLAG_CONNECTABLE; - - if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) - flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; - else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) - flags |= MGMT_ADV_FLAG_DISCOV; - - return flags; - } - - adv_instance = hci_find_adv_instance(hdev, instance); - - /* Return 0 when we got an invalid instance identifier. */ - if (!adv_instance) - return 0; - - return adv_instance->flags; -} - static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags) { /* If privacy is not enabled don't use RPA */ @@ -1555,15 +1348,15 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) void __hci_req_enable_advertising(struct hci_request *req) { struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance; + struct adv_info *adv; struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; u16 adv_min_interval, adv_max_interval; u32 flags; - flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); - adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); + flags = hci_adv_instance_flags(hdev, hdev->cur_adv_instance); + adv = hci_find_adv_instance(hdev, hdev->cur_adv_instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. @@ -1595,9 +1388,9 @@ void __hci_req_enable_advertising(struct hci_request *req) memset(&cp, 0, sizeof(cp)); - if (adv_instance) { - adv_min_interval = adv_instance->min_interval; - adv_max_interval = adv_instance->max_interval; + if (adv) { + adv_min_interval = adv->min_interval; + adv_max_interval = adv->max_interval; } else { adv_min_interval = hdev->le_adv_min_interval; adv_max_interval = hdev->le_adv_max_interval; @@ -1628,85 +1421,6 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) -{ - size_t short_len; - size_t complete_len; - - /* no space left for name (+ NULL + type + len) */ - if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) - return ad_len; - - /* use complete name if present and fits */ - complete_len = strlen(hdev->dev_name); - if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) - return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len + 1); - - /* use short name if present */ - short_len = strlen(hdev->short_name); - if (short_len) - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->short_name, short_len + 1); - - /* use shortened full name if present, we already know that name - * is longer then HCI_MAX_SHORT_NAME_LENGTH - */ - if (complete_len) { - u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; - - memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); - name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; - - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name, - sizeof(name)); - } - - return ad_len; -} - -static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len) -{ - return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance); -} - -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) -{ - u8 scan_rsp_len = 0; - - if (hdev->appearance) - scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); - - return append_local_name(hdev, ptr, scan_rsp_len); -} - -static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, - u8 *ptr) -{ - struct adv_info *adv_instance; - u32 instance_flags; - u8 scan_rsp_len = 0; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - instance_flags = adv_instance->flags; - - if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) - scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); - - memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data, - adv_instance->scan_rsp_len); - - scan_rsp_len += adv_instance->scan_rsp_len; - - if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) - scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); - - return scan_rsp_len; -} - void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; @@ -1723,11 +1437,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) memset(&pdu, 0, sizeof(pdu)); - if (instance) - len = create_instance_scan_rsp_data(hdev, instance, - pdu.data); - else - len = create_default_scan_rsp_data(hdev, pdu.data); + len = eir_create_scan_rsp(hdev, instance, pdu.data); if (hdev->scan_rsp_data_len == len && !memcmp(pdu.data, hdev->scan_rsp_data, len)) @@ -1748,11 +1458,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - if (instance) - len = create_instance_scan_rsp_data(hdev, instance, - cp.data); - else - len = create_default_scan_rsp_data(hdev, cp.data); + len = eir_create_scan_rsp(hdev, instance, cp.data); if (hdev->scan_rsp_data_len == len && !memcmp(cp.data, hdev->scan_rsp_data, len)) @@ -1767,95 +1473,6 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) } } -static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) -{ - struct adv_info *adv_instance = NULL; - u8 ad_len = 0, flags = 0; - u32 instance_flags; - - /* Return 0 when the current instance identifier is invalid. */ - if (instance) { - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - } - - instance_flags = get_adv_instance_flags(hdev, instance); - - /* If instance already has the flags set skip adding it once - * again. - */ - if (adv_instance && eir_get_data(adv_instance->adv_data, - adv_instance->adv_data_len, EIR_FLAGS, - NULL)) - goto skip_flags; - - /* The Add Advertising command allows userspace to set both the general - * and limited discoverable flags. - */ - if (instance_flags & MGMT_ADV_FLAG_DISCOV) - flags |= LE_AD_GENERAL; - - if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) - flags |= LE_AD_LIMITED; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - flags |= LE_AD_NO_BREDR; - - if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { - /* If a discovery flag wasn't provided, simply use the global - * settings. - */ - if (!flags) - flags |= mgmt_get_adv_discov_flags(hdev); - - /* If flags would still be empty, then there is no need to - * include the "Flags" AD field". - */ - if (flags) { - ptr[0] = 0x02; - ptr[1] = EIR_FLAGS; - ptr[2] = flags; - - ad_len += 3; - ptr += 3; - } - } - -skip_flags: - if (adv_instance) { - memcpy(ptr, adv_instance->adv_data, - adv_instance->adv_data_len); - ad_len += adv_instance->adv_data_len; - ptr += adv_instance->adv_data_len; - } - - if (instance_flags & MGMT_ADV_FLAG_TX_POWER) { - s8 adv_tx_power; - - if (ext_adv_capable(hdev)) { - if (adv_instance) - adv_tx_power = adv_instance->tx_power; - else - adv_tx_power = hdev->adv_tx_power; - } else { - adv_tx_power = hdev->adv_tx_power; - } - - /* Provide Tx Power only if we can provide a valid value for it */ - if (adv_tx_power != HCI_TX_POWER_INVALID) { - ptr[0] = 0x02; - ptr[1] = EIR_TX_POWER; - ptr[2] = (u8)adv_tx_power; - - ad_len += 3; - ptr += 3; - } - } - - return ad_len; -} - void __hci_req_update_adv_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; @@ -1872,7 +1489,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance) memset(&pdu, 0, sizeof(pdu)); - len = create_instance_adv_data(hdev, instance, pdu.data); + len = eir_create_adv_data(hdev, instance, pdu.data); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && @@ -1894,7 +1511,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - len = create_instance_adv_data(hdev, instance, cp.data); + len = eir_create_adv_data(hdev, instance, cp.data); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && @@ -2183,7 +1800,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) adv_instance = NULL; } - flags = get_adv_instance_flags(hdev, instance); + flags = hci_adv_instance_flags(hdev, instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. @@ -2223,7 +1840,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND); - } else if (adv_instance_is_scannable(hdev, instance) || + } else if (hci_adv_instance_is_scannable(hdev, instance) || (flags & MGMT_ADV_PARAM_SCAN_RSP)) { if (secondary_adv) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND); @@ -3327,6 +2944,53 @@ bool hci_req_stop_discovery(struct hci_request *req) return ret; } +static void config_data_path_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + bt_dev_dbg(hdev, "status %u", status); +} + +int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec) +{ + struct hci_request req; + int err; + __u8 vnd_len, *vnd_data = NULL; + struct hci_op_configure_data_path *cmd = NULL; + + hci_req_init(&req, hdev); + + err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, + &vnd_data); + if (err < 0) + goto error; + + cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL); + if (!cmd) { + err = -ENOMEM; + goto error; + } + + err = hdev->get_data_path_id(hdev, &cmd->data_path_id); + if (err < 0) + goto error; + + cmd->vnd_len = vnd_len; + memcpy(cmd->vnd_data, vnd_data, vnd_len); + + cmd->direction = 0x00; + hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd); + + cmd->direction = 0x01; + hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd); + + err = hci_req_run(&req, config_data_path_complete); +error: + + kfree(cmd); + kfree(vnd_data); + return err; +} + static int stop_discovery(struct hci_request *req, unsigned long opt) { hci_dev_lock(req->hdev); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 39ee8a18087a..f31420f58525 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -101,6 +101,8 @@ void __hci_req_update_class(struct hci_request *req); /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); +int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec); + static inline void hci_req_update_scan(struct hci_dev *hdev) { queue_work(hdev->req_workqueue, &hdev->scan_update); @@ -122,26 +124,3 @@ static inline void hci_update_background_scan(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); - -u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len); - -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, - u8 *data, u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - -static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) -{ - eir[eir_len++] = sizeof(type) + sizeof(data); - eir[eir_len++] = type; - put_unaligned_le16(data, &eir[eir_len]); - eir_len += sizeof(data); - - return eir_len; -} diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index f1128c2134f0..d0dad1fafe07 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -57,6 +57,7 @@ struct hci_pinfo { unsigned long flags; __u32 cookie; char comm[TASK_COMM_LEN]; + __u16 mtu; }; static struct hci_dev *hci_hdev_from_sock(struct sock *sk) @@ -1374,6 +1375,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, break; } + /* Default MTU to HCI_MAX_FRAME_SIZE if not set */ + if (!hci_pi(sk)->mtu) + hci_pi(sk)->mtu = HCI_MAX_FRAME_SIZE; + sk->sk_state = BT_BOUND; done: @@ -1506,9 +1511,8 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, } static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, - struct msghdr *msg, size_t msglen) + struct sk_buff *skb) { - void *buf; u8 *cp; struct mgmt_hdr *hdr; u16 opcode, index, len; @@ -1517,40 +1521,31 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, bool var_len, no_hdev; int err; - BT_DBG("got %zu bytes", msglen); + BT_DBG("got %d bytes", skb->len); - if (msglen < sizeof(*hdr)) + if (skb->len < sizeof(*hdr)) return -EINVAL; - buf = kmalloc(msglen, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (memcpy_from_msg(buf, msg, msglen)) { - err = -EFAULT; - goto done; - } - - hdr = buf; + hdr = (void *)skb->data; opcode = __le16_to_cpu(hdr->opcode); index = __le16_to_cpu(hdr->index); len = __le16_to_cpu(hdr->len); - if (len != msglen - sizeof(*hdr)) { + if (len != skb->len - sizeof(*hdr)) { err = -EINVAL; goto done; } if (chan->channel == HCI_CHANNEL_CONTROL) { - struct sk_buff *skb; + struct sk_buff *cmd; /* Send event to monitor */ - skb = create_monitor_ctrl_command(sk, index, opcode, len, - buf + sizeof(*hdr)); - if (skb) { - hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + cmd = create_monitor_ctrl_command(sk, index, opcode, len, + skb->data + sizeof(*hdr)); + if (cmd) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, cmd, HCI_SOCK_TRUSTED, NULL); - kfree_skb(skb); + kfree_skb(cmd); } } @@ -1615,26 +1610,25 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, if (hdev && chan->hdev_init) chan->hdev_init(sk, hdev); - cp = buf + sizeof(*hdr); + cp = skb->data + sizeof(*hdr); err = handler->func(sk, hdev, cp, len); if (err < 0) goto done; - err = msglen; + err = skb->len; done: if (hdev) hci_dev_put(hdev); - kfree(buf); return err; } -static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) +static int hci_logging_frame(struct sock *sk, struct sk_buff *skb, + unsigned int flags) { struct hci_mon_hdr *hdr; - struct sk_buff *skb; struct hci_dev *hdev; u16 index; int err; @@ -1643,24 +1637,13 @@ static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) * the priority byte, the ident length byte and at least one string * terminator NUL byte. Anything shorter are invalid packets. */ - if (len < sizeof(*hdr) + 3) + if (skb->len < sizeof(*hdr) + 3) return -EINVAL; - skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return err; - - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - err = -EFAULT; - goto drop; - } - hdr = (void *)skb->data; - if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) { - err = -EINVAL; - goto drop; - } + if (__le16_to_cpu(hdr->len) != skb->len - sizeof(*hdr)) + return -EINVAL; if (__le16_to_cpu(hdr->opcode) == 0x0000) { __u8 priority = skb->data[sizeof(*hdr)]; @@ -1679,25 +1662,20 @@ static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) * The message follows the ident string (if present) and * must be NUL terminated. Otherwise it is not a valid packet. */ - if (priority > 7 || skb->data[len - 1] != 0x00 || - ident_len > len - sizeof(*hdr) - 3 || - skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) { - err = -EINVAL; - goto drop; - } + if (priority > 7 || skb->data[skb->len - 1] != 0x00 || + ident_len > skb->len - sizeof(*hdr) - 3 || + skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) + return -EINVAL; } else { - err = -EINVAL; - goto drop; + return -EINVAL; } index = __le16_to_cpu(hdr->index); if (index != MGMT_INDEX_NONE) { hdev = hci_dev_get(index); - if (!hdev) { - err = -ENODEV; - goto drop; - } + if (!hdev) + return -ENODEV; } else { hdev = NULL; } @@ -1705,13 +1683,11 @@ static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING); hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); - err = len; + err = skb->len; if (hdev) hci_dev_put(hdev); -drop: - kfree_skb(skb); return err; } @@ -1723,19 +1699,23 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, struct hci_dev *hdev; struct sk_buff *skb; int err; + const unsigned int flags = msg->msg_flags; BT_DBG("sock %p sk %p", sock, sk); - if (msg->msg_flags & MSG_OOB) + if (flags & MSG_OOB) return -EOPNOTSUPP; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE| - MSG_CMSG_COMPAT)) + if (flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL | MSG_ERRQUEUE | MSG_CMSG_COMPAT)) return -EINVAL; - if (len < 4 || len > HCI_MAX_FRAME_SIZE) + if (len < 4 || len > hci_pi(sk)->mtu) return -EINVAL; + skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); + if (IS_ERR(skb)) + return PTR_ERR(skb); + lock_sock(sk); switch (hci_pi(sk)->channel) { @@ -1744,39 +1724,30 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, break; case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; - goto done; + goto drop; case HCI_CHANNEL_LOGGING: - err = hci_logging_frame(sk, msg, len); - goto done; + err = hci_logging_frame(sk, skb, flags); + goto drop; default: mutex_lock(&mgmt_chan_list_lock); chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); if (chan) - err = hci_mgmt_cmd(chan, sk, msg, len); + err = hci_mgmt_cmd(chan, sk, skb); else err = -EINVAL; mutex_unlock(&mgmt_chan_list_lock); - goto done; + goto drop; } hdev = hci_hdev_from_sock(sk); if (IS_ERR(hdev)) { err = PTR_ERR(hdev); - goto done; + goto drop; } if (!test_bit(HCI_UP, &hdev->flags)) { err = -ENETDOWN; - goto done; - } - - skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - goto done; - - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - err = -EFAULT; goto drop; } @@ -1857,8 +1828,8 @@ drop: goto done; } -static int hci_sock_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int len) +static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int len) { struct hci_ufilter uf = { .opcode = 0 }; struct sock *sk = sock->sk; @@ -1866,9 +1837,6 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); - if (level != SOL_HCI) - return -ENOPROTOOPT; - lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { @@ -1943,18 +1911,63 @@ done: return err; } -static int hci_sock_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) +static int hci_sock_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int len) { - struct hci_ufilter uf; struct sock *sk = sock->sk; - int len, opt, err = 0; + int err = 0, opt = 0; BT_DBG("sk %p, opt %d", sk, optname); - if (level != SOL_HCI) + if (level == SOL_HCI) + return hci_sock_setsockopt_old(sock, level, optname, optval, + len); + + if (level != SOL_BLUETOOTH) return -ENOPROTOOPT; + lock_sock(sk); + + switch (optname) { + case BT_SNDMTU: + case BT_RCVMTU: + switch (hci_pi(sk)->channel) { + /* Don't allow changing MTU for channels that are meant for HCI + * traffic only. + */ + case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: + err = -ENOPROTOOPT; + goto done; + } + + if (copy_from_sockptr(&opt, optval, sizeof(u16))) { + err = -EFAULT; + break; + } + + hci_pi(sk)->mtu = opt; + break; + + default: + err = -ENOPROTOOPT; + break; + } + +done: + release_sock(sk); + return err; +} + +static int hci_sock_getsockopt_old(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct hci_ufilter uf; + struct sock *sk = sock->sk; + int len, opt, err = 0; + + BT_DBG("sk %p, opt %d", sk, optname); + if (get_user(len, optlen)) return -EFAULT; @@ -2012,6 +2025,39 @@ done: return err; } +static int hci_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p, opt %d", sk, optname); + + if (level == SOL_HCI) + return hci_sock_getsockopt_old(sock, level, optname, optval, + optlen); + + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + + lock_sock(sk); + + switch (optname) { + case BT_SNDMTU: + case BT_RCVMTU: + if (put_user(hci_pi(sk)->mtu, (u16 __user *)optval)) + err = -EFAULT; + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + static const struct proto_ops hci_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 77ba68209dbd..4f8f37599962 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7902,7 +7902,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, dst_type = ADDR_LE_DEV_RANDOM; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - hcon = hci_connect_le(hdev, dst, dst_type, + hcon = hci_connect_le(hdev, dst, dst_type, false, chan->sec_level, HCI_LE_CONN_TIMEOUT, HCI_ROLE_SLAVE, NULL); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c99d65ef13b1..160c016a5dfb 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1508,6 +1508,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return; + l2cap_sock_kill(sk); } @@ -1516,6 +1519,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); /* This callback can be called both for server (BT_LISTEN) @@ -1707,8 +1713,10 @@ static void l2cap_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); - if (l2cap_pi(sk)->chan) + if (l2cap_pi(sk)->chan) { + l2cap_pi(sk)->chan->data = NULL; l2cap_chan_put(l2cap_pi(sk)->chan); + } if (l2cap_pi(sk)->rx_busy_skb) { kfree_skb(l2cap_pi(sk)->rx_busy_skb); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index cea01e275f1e..3e5283607b97 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,6 +38,7 @@ #include "mgmt_util.h" #include "mgmt_config.h" #include "msft.h" +#include "eir.h" #define MGMT_VERSION 1 #define MGMT_REVISION 21 @@ -3791,6 +3792,18 @@ static const u8 debug_uuid[16] = { }; #endif +/* 330859bc-7506-492d-9370-9a6f0614037f */ +static const u8 quality_report_uuid[16] = { + 0x7f, 0x03, 0x14, 0x06, 0x6f, 0x9a, 0x70, 0x93, + 0x2d, 0x49, 0x06, 0x75, 0xbc, 0x59, 0x08, 0x33, +}; + +/* a6695ace-ee7f-4fb9-881a-5fac66c629af */ +static const u8 offload_codecs_uuid[16] = { + 0xaf, 0x29, 0xc6, 0x66, 0xac, 0x5f, 0x1a, 0x88, + 0xb9, 0x4f, 0x7f, 0xee, 0xce, 0x5a, 0x69, 0xa6, +}; + /* 671b10b5-42c0-4696-9227-eb28d1b049d6 */ static const u8 simult_central_periph_uuid[16] = { 0xd6, 0x49, 0xb0, 0xd1, 0x28, 0xeb, 0x27, 0x92, @@ -3806,7 +3819,7 @@ static const u8 rpa_resolution_uuid[16] = { static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - char buf[62]; /* Enough space for 3 features */ + char buf[102]; /* Enough space for 5 features: 2 + 20 * 5 */ struct mgmt_rp_read_exp_features_info *rp = (void *)buf; u16 idx = 0; u32 flags; @@ -3850,6 +3863,28 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, idx++; } + if (hdev && hdev->set_quality_report) { + if (hci_dev_test_flag(hdev, HCI_QUALITY_REPORT)) + flags = BIT(0); + else + flags = 0; + + memcpy(rp->features[idx].uuid, quality_report_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } + + if (hdev && hdev->get_data_path_id) { + if (hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) + flags = BIT(0); + else + flags = 0; + + memcpy(rp->features[idx].uuid, offload_codecs_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } + rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable @@ -3892,150 +3927,341 @@ static int exp_debug_feature_changed(bool enabled, struct sock *skip) } #endif -static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, - void *data, u16 data_len) +static int exp_quality_report_feature_changed(bool enabled, struct sock *skip) { - struct mgmt_cp_set_exp_feature *cp = data; - struct mgmt_rp_set_exp_feature rp; + struct mgmt_ev_exp_feature_changed ev; - bt_dev_dbg(hdev, "sock %p", sk); + memset(&ev, 0, sizeof(ev)); + memcpy(ev.uuid, quality_report_uuid, 16); + ev.flags = cpu_to_le32(enabled ? BIT(0) : 0); + + return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, NULL, + &ev, sizeof(ev), + HCI_MGMT_EXP_FEATURE_EVENTS, skip); +} + +#define EXP_FEAT(_uuid, _set_func) \ +{ \ + .uuid = _uuid, \ + .set_func = _set_func, \ +} + +/* The zero key uuid is special. Multiple exp features are set through it. */ +static int set_zero_key_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, u16 data_len) +{ + struct mgmt_rp_set_exp_feature rp; - if (!memcmp(cp->uuid, ZERO_KEY, 16)) { - memset(rp.uuid, 0, 16); - rp.flags = cpu_to_le32(0); + memset(rp.uuid, 0, 16); + rp.flags = cpu_to_le32(0); #ifdef CONFIG_BT_FEATURE_DEBUG - if (!hdev) { - bool changed = bt_dbg_get(); + if (!hdev) { + bool changed = bt_dbg_get(); - bt_dbg_set(false); + bt_dbg_set(false); - if (changed) - exp_debug_feature_changed(false, sk); - } + if (changed) + exp_debug_feature_changed(false, sk); + } #endif - if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) { - bool changed = hci_dev_test_flag(hdev, - HCI_ENABLE_LL_PRIVACY); + if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) { + bool changed = hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY); - hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); + hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); - if (changed) - exp_ll_privacy_feature_changed(false, hdev, sk); - } + if (changed) + exp_ll_privacy_feature_changed(false, hdev, sk); + } - hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); - return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, 0, - &rp, sizeof(rp)); - } + return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); +} #ifdef CONFIG_BT_FEATURE_DEBUG - if (!memcmp(cp->uuid, debug_uuid, 16)) { - bool val, changed; - int err; +static int set_debug_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, u16 data_len) +{ + struct mgmt_rp_set_exp_feature rp; - /* Command requires to use the non-controller index */ - if (hdev) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_INDEX); + bool val, changed; + int err; - /* Parameters are limited to a single octet */ - if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) - return mgmt_cmd_status(sk, MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); + /* Command requires to use the non-controller index */ + if (hdev) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); - /* Only boolean on/off is supported */ - if (cp->param[0] != 0x00 && cp->param[0] != 0x01) - return mgmt_cmd_status(sk, MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); - val = !!cp->param[0]; - changed = val ? !bt_dbg_get() : bt_dbg_get(); - bt_dbg_set(val); + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); - memcpy(rp.uuid, debug_uuid, 16); - rp.flags = cpu_to_le32(val ? BIT(0) : 0); + val = !!cp->param[0]; + changed = val ? !bt_dbg_get() : bt_dbg_get(); + bt_dbg_set(val); - hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + memcpy(rp.uuid, debug_uuid, 16); + rp.flags = cpu_to_le32(val ? BIT(0) : 0); - err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, 0, - &rp, sizeof(rp)); + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); - if (changed) - exp_debug_feature_changed(val, sk); + err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); - return err; - } + if (changed) + exp_debug_feature_changed(val, sk); + + return err; +} #endif - if (!memcmp(cp->uuid, rpa_resolution_uuid, 16)) { - bool val, changed; - int err; - u32 flags; +static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, + u16 data_len) +{ + struct mgmt_rp_set_exp_feature rp; + bool val, changed; + int err; + u32 flags; + + /* Command requires to use the controller index */ + if (!hdev) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); - /* Command requires to use the controller index */ - if (!hdev) - return mgmt_cmd_status(sk, MGMT_INDEX_NONE, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_INDEX); + /* Changes can only be made when controller is powered down */ + if (hdev_is_powered(hdev)) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_REJECTED); - /* Changes can only be made when controller is powered down */ - if (hdev_is_powered(hdev)) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_REJECTED); + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); - /* Parameters are limited to a single octet */ - if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); - /* Only boolean on/off is supported */ - if (cp->param[0] != 0x00 && cp->param[0] != 0x01) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_INVALID_PARAMS); + val = !!cp->param[0]; - val = !!cp->param[0]; + if (val) { + changed = !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY); + hci_dev_set_flag(hdev, HCI_ENABLE_LL_PRIVACY); + hci_dev_clear_flag(hdev, HCI_ADVERTISING); - if (val) { - changed = !hci_dev_test_flag(hdev, - HCI_ENABLE_LL_PRIVACY); - hci_dev_set_flag(hdev, HCI_ENABLE_LL_PRIVACY); - hci_dev_clear_flag(hdev, HCI_ADVERTISING); + /* Enable LL privacy + supported settings changed */ + flags = BIT(0) | BIT(1); + } else { + changed = hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY); + hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); - /* Enable LL privacy + supported settings changed */ - flags = BIT(0) | BIT(1); - } else { - changed = hci_dev_test_flag(hdev, - HCI_ENABLE_LL_PRIVACY); - hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); + /* Disable LL privacy + supported settings changed */ + flags = BIT(1); + } - /* Disable LL privacy + supported settings changed */ - flags = BIT(1); + memcpy(rp.uuid, rpa_resolution_uuid, 16); + rp.flags = cpu_to_le32(flags); + + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + + if (changed) + exp_ll_privacy_feature_changed(val, hdev, sk); + + return err; +} + +static int set_quality_report_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, + u16 data_len) +{ + struct mgmt_rp_set_exp_feature rp; + bool val, changed; + int err; + + /* Command requires to use a valid controller index */ + if (!hdev) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); + + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + hci_req_sync_lock(hdev); + + val = !!cp->param[0]; + changed = (val != hci_dev_test_flag(hdev, HCI_QUALITY_REPORT)); + + if (!hdev->set_quality_report) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_NOT_SUPPORTED); + goto unlock_quality_report; + } + + if (changed) { + err = hdev->set_quality_report(hdev, val); + if (err) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_FAILED); + goto unlock_quality_report; } + if (val) + hci_dev_set_flag(hdev, HCI_QUALITY_REPORT); + else + hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); + } - memcpy(rp.uuid, rpa_resolution_uuid, 16); - rp.flags = cpu_to_le32(flags); + bt_dev_dbg(hdev, "quality report enable %d changed %d", val, changed); - hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + memcpy(rp.uuid, quality_report_uuid, 16); + rp.flags = cpu_to_le32(val ? BIT(0) : 0); + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); - err = mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, 0, - &rp, sizeof(rp)); + if (changed) + exp_quality_report_feature_changed(val, sk); - if (changed) - exp_ll_privacy_feature_changed(val, hdev, sk); +unlock_quality_report: + hci_req_sync_unlock(hdev); + return err; +} - return err; +static int exp_offload_codec_feature_changed(bool enabled, struct sock *skip) +{ + struct mgmt_ev_exp_feature_changed ev; + + memset(&ev, 0, sizeof(ev)); + memcpy(ev.uuid, offload_codecs_uuid, 16); + ev.flags = cpu_to_le32(enabled ? BIT(0) : 0); + + return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, NULL, + &ev, sizeof(ev), + HCI_MGMT_EXP_FEATURE_EVENTS, skip); +} + +static int set_offload_codec_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, + u16 data_len) +{ + bool val, changed; + int err; + struct mgmt_rp_set_exp_feature rp; + + /* Command requires to use a valid controller index */ + if (!hdev) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); + + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + val = !!cp->param[0]; + changed = (val != hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)); + + if (!hdev->get_data_path_id) { + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_NOT_SUPPORTED); + } + + if (changed) { + if (val) + hci_dev_set_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED); + else + hci_dev_clear_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED); + } + + bt_dev_info(hdev, "offload codecs enable %d changed %d", + val, changed); + + memcpy(rp.uuid, offload_codecs_uuid, 16); + rp.flags = cpu_to_le32(val ? BIT(0) : 0); + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + + if (changed) + exp_offload_codec_feature_changed(val, sk); + + return err; +} + +static const struct mgmt_exp_feature { + const u8 *uuid; + int (*set_func)(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, u16 data_len); +} exp_features[] = { + EXP_FEAT(ZERO_KEY, set_zero_key_func), +#ifdef CONFIG_BT_FEATURE_DEBUG + EXP_FEAT(debug_uuid, set_debug_func), +#endif + EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func), + EXP_FEAT(quality_report_uuid, set_quality_report_func), + EXP_FEAT(offload_codecs_uuid, set_offload_codec_func), + + /* end with a null feature */ + EXP_FEAT(NULL, NULL) +}; + +static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_set_exp_feature *cp = data; + size_t i = 0; + + bt_dev_dbg(hdev, "sock %p", sk); + + for (i = 0; exp_features[i].uuid; i++) { + if (!memcmp(cp->uuid, exp_features[i].uuid, 16)) + return exp_features[i].set_func(sk, hdev, cp, data_len); } return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE, @@ -7315,6 +7541,11 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, if (!rp) return -ENOMEM; + if (!status && !lmp_ssp_capable(hdev)) { + status = MGMT_STATUS_NOT_SUPPORTED; + eir_len = 0; + } + if (status) goto complete; @@ -7526,7 +7757,7 @@ static u8 calculate_name_len(struct hci_dev *hdev) { u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; - return append_local_name(hdev, buf, 0); + return eir_append_local_name(hdev, buf, 0); } static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, @@ -8222,7 +8453,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, * advertising. */ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_NOT_SUPPORTED); hci_dev_lock(hdev); diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index b4bfae41e8a5..255cffa554ee 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -94,11 +94,14 @@ struct msft_data { __u16 pending_add_handle; __u16 pending_remove_handle; __u8 reregistering; + __u8 suspending; __u8 filter_enabled; }; static int __msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor); +static int __msft_remove_monitor(struct hci_dev *hdev, + struct adv_monitor *monitor, u16 handle); bool msft_monitor_supported(struct hci_dev *hdev) { @@ -154,7 +157,7 @@ failed: } /* This function requires the caller holds hdev->lock */ -static void reregister_monitor_on_restart(struct hci_dev *hdev, int handle) +static void reregister_monitor(struct hci_dev *hdev, int handle) { struct adv_monitor *monitor; struct msft_data *msft = hdev->msft_data; @@ -182,31 +185,102 @@ static void reregister_monitor_on_restart(struct hci_dev *hdev, int handle) } } +/* This function requires the caller holds hdev->lock */ +static void remove_monitor_on_suspend(struct hci_dev *hdev, int handle) +{ + struct adv_monitor *monitor; + struct msft_data *msft = hdev->msft_data; + int err; + + while (1) { + monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); + if (!monitor) { + /* All monitors have been removed */ + msft->suspending = false; + hci_update_background_scan(hdev); + return; + } + + msft->pending_remove_handle = (u16)handle; + err = __msft_remove_monitor(hdev, monitor, handle); + + /* If success, return and wait for monitor removed callback */ + if (!err) + return; + + /* Otherwise free the monitor and keep removing */ + hci_free_adv_monitor(hdev, monitor); + handle++; + } +} + +/* This function requires the caller holds hdev->lock */ +void msft_suspend(struct hci_dev *hdev) +{ + struct msft_data *msft = hdev->msft_data; + + if (!msft) + return; + + if (msft_monitor_supported(hdev)) { + msft->suspending = true; + /* Quitely remove all monitors on suspend to avoid waking up + * the system. + */ + remove_monitor_on_suspend(hdev, 0); + } +} + +/* This function requires the caller holds hdev->lock */ +void msft_resume(struct hci_dev *hdev) +{ + struct msft_data *msft = hdev->msft_data; + + if (!msft) + return; + + if (msft_monitor_supported(hdev)) { + msft->reregistering = true; + /* Monitors are removed on suspend, so we need to add all + * monitors on resume. + */ + reregister_monitor(hdev, 0); + } +} + void msft_do_open(struct hci_dev *hdev) { - struct msft_data *msft; + struct msft_data *msft = hdev->msft_data; if (hdev->msft_opcode == HCI_OP_NOP) return; + if (!msft) { + bt_dev_err(hdev, "MSFT extension not registered"); + return; + } + bt_dev_dbg(hdev, "Initialize MSFT extension"); - msft = kzalloc(sizeof(*msft), GFP_KERNEL); - if (!msft) - return; + /* Reset existing MSFT data before re-reading */ + kfree(msft->evt_prefix); + msft->evt_prefix = NULL; + msft->evt_prefix_len = 0; + msft->features = 0; if (!read_supported_features(hdev, msft)) { + hdev->msft_data = NULL; kfree(msft); return; } - INIT_LIST_HEAD(&msft->handle_map); - hdev->msft_data = msft; - if (msft_monitor_supported(hdev)) { msft->reregistering = true; msft_set_filter_enable(hdev, true); - reregister_monitor_on_restart(hdev, 0); + /* Monitors get removed on power off, so we need to explicitly + * tell the controller to re-monitor. + */ + reregister_monitor(hdev, 0); } } @@ -221,8 +295,9 @@ void msft_do_close(struct hci_dev *hdev) bt_dev_dbg(hdev, "Cleanup of MSFT extension"); - hdev->msft_data = NULL; - + /* The controller will silently remove all monitors on power off. + * Therefore, remove handle_data mapping and reset monitor state. + */ list_for_each_entry_safe(handle_data, tmp, &msft->handle_map, list) { monitor = idr_find(&hdev->adv_monitors_idr, handle_data->mgmt_handle); @@ -233,6 +308,34 @@ void msft_do_close(struct hci_dev *hdev) list_del(&handle_data->list); kfree(handle_data); } +} + +void msft_register(struct hci_dev *hdev) +{ + struct msft_data *msft = NULL; + + bt_dev_dbg(hdev, "Register MSFT extension"); + + msft = kzalloc(sizeof(*msft), GFP_KERNEL); + if (!msft) { + bt_dev_err(hdev, "Failed to register MSFT extension"); + return; + } + + INIT_LIST_HEAD(&msft->handle_map); + hdev->msft_data = msft; +} + +void msft_unregister(struct hci_dev *hdev) +{ + struct msft_data *msft = hdev->msft_data; + + if (!msft) + return; + + bt_dev_dbg(hdev, "Unregister MSFT extension"); + + hdev->msft_data = NULL; kfree(msft->evt_prefix); kfree(msft); @@ -345,8 +448,7 @@ unlock: /* If in restart/reregister sequence, keep registering. */ if (msft->reregistering) - reregister_monitor_on_restart(hdev, - msft->pending_add_handle + 1); + reregister_monitor(hdev, msft->pending_add_handle + 1); hci_dev_unlock(hdev); @@ -383,13 +485,25 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, if (handle_data) { monitor = idr_find(&hdev->adv_monitors_idr, handle_data->mgmt_handle); - if (monitor) + + if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED) + monitor->state = ADV_MONITOR_STATE_REGISTERED; + + /* Do not free the monitor if it is being removed due to + * suspend. It will be re-monitored on resume. + */ + if (monitor && !msft->suspending) hci_free_adv_monitor(hdev, monitor); list_del(&handle_data->list); kfree(handle_data); } + /* If in suspend/remove sequence, keep removing. */ + if (msft->suspending) + remove_monitor_on_suspend(hdev, + msft->pending_remove_handle + 1); + /* If remove all monitors is required, we need to continue the process * here because the earlier it was paused when waiting for the * response from controller. @@ -408,7 +522,8 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, hci_dev_unlock(hdev); done: - hci_remove_adv_monitor_complete(hdev, status); + if (!msft->suspending) + hci_remove_adv_monitor_complete(hdev, status); } static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev, @@ -541,15 +656,15 @@ int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor) if (!msft) return -EOPNOTSUPP; - if (msft->reregistering) + if (msft->reregistering || msft->suspending) return -EBUSY; return __msft_add_monitor_pattern(hdev, monitor); } /* This function requires the caller holds hdev->lock */ -int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, - u16 handle) +static int __msft_remove_monitor(struct hci_dev *hdev, + struct adv_monitor *monitor, u16 handle) { struct msft_cp_le_cancel_monitor_advertisement cp; struct msft_monitor_advertisement_handle_data *handle_data; @@ -557,12 +672,6 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, struct msft_data *msft = hdev->msft_data; int err = 0; - if (!msft) - return -EOPNOTSUPP; - - if (msft->reregistering) - return -EBUSY; - handle_data = msft_find_handle_data(hdev, monitor->handle, true); /* If no matched handle, just remove without telling controller */ @@ -582,6 +691,21 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, return err; } +/* This function requires the caller holds hdev->lock */ +int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, + u16 handle) +{ + struct msft_data *msft = hdev->msft_data; + + if (!msft) + return -EOPNOTSUPP; + + if (msft->reregistering || msft->suspending) + return -EBUSY; + + return __msft_remove_monitor(hdev, monitor, handle); +} + void msft_req_add_set_filter_enable(struct hci_request *req, bool enable) { struct hci_dev *hdev = req->hdev; diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index 6e56d94b88d8..59c6e081c789 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -13,6 +13,8 @@ #if IS_ENABLED(CONFIG_BT_MSFTEXT) bool msft_monitor_supported(struct hci_dev *hdev); +void msft_register(struct hci_dev *hdev); +void msft_unregister(struct hci_dev *hdev); void msft_do_open(struct hci_dev *hdev); void msft_do_close(struct hci_dev *hdev); void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb); @@ -22,6 +24,8 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, u16 handle); void msft_req_add_set_filter_enable(struct hci_request *req, bool enable); int msft_set_filter_enable(struct hci_dev *hdev, bool enable); +void msft_suspend(struct hci_dev *hdev); +void msft_resume(struct hci_dev *hdev); bool msft_curve_validity(struct hci_dev *hdev); #else @@ -31,6 +35,8 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev) return false; } +static inline void msft_register(struct hci_dev *hdev) {} +static inline void msft_unregister(struct hci_dev *hdev) {} static inline void msft_do_open(struct hci_dev *hdev) {} static inline void msft_do_close(struct hci_dev *hdev) {} static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {} @@ -55,6 +61,9 @@ static inline int msft_set_filter_enable(struct hci_dev *hdev, bool enable) return -EOPNOTSUPP; } +static inline void msft_suspend(struct hci_dev *hdev) {} +static inline void msft_resume(struct hci_dev *hdev) {} + static inline bool msft_curve_validity(struct hci_dev *hdev) { return false; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index f2bacb464ccf..7324764384b6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -549,22 +549,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel) return dlc; } +static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag) +{ + int len = frag->len; + + BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); + + if (len > d->mtu) + return -EINVAL; + + rfcomm_make_uih(frag, d->addr); + __skb_queue_tail(&d->tx_queue, frag); + + return len; +} + int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) { - int len = skb->len; + unsigned long flags; + struct sk_buff *frag, *next; + int len; if (d->state != BT_CONNECTED) return -ENOTCONN; - BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); + frag = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; - if (len > d->mtu) - return -EINVAL; + /* Queue all fragments atomically. */ + spin_lock_irqsave(&d->tx_queue.lock, flags); - rfcomm_make_uih(skb, d->addr); - skb_queue_tail(&d->tx_queue, skb); + len = rfcomm_dlc_send_frag(d, skb); + if (len < 0 || !frag) + goto unlock; + + for (; frag; frag = next) { + int ret; + + next = frag->next; + + ret = rfcomm_dlc_send_frag(d, frag); + if (ret < 0) { + kfree_skb(frag); + goto unlock; + } + + len += ret; + } + +unlock: + spin_unlock_irqrestore(&d->tx_queue.lock, flags); - if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) + if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags)) rfcomm_schedule(); return len; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 2c95bb58f901..4bf4ea6cbb5e 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -575,46 +575,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); - if (sent) - goto done; - - while (len) { - size_t size = min_t(size_t, len, d->mtu); - int err; - - skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) { - if (sent == 0) - sent = err; - break; - } - skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } - skb->priority = sk->sk_priority; + release_sock(sk); - err = rfcomm_dlc_send(d, skb); - if (err < 0) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } + if (sent) + return sent; - sent += size; - len -= size; - } + skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, + RFCOMM_SKB_TAIL_RESERVE); + if (IS_ERR(skb)) + return PTR_ERR(skb); -done: - release_sock(sk); + sent = rfcomm_dlc_send(d, skb); + if (sent < 0) + kfree_skb(skb); return sent; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 98a881586512..8eabf41b2993 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -69,6 +69,7 @@ struct sco_pinfo { __u32 flags; __u16 setting; __u8 cmsg_mask; + struct bt_codec codec; struct sco_conn *conn; }; @@ -133,6 +134,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) return NULL; spin_lock_init(&conn->lock); + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); hcon->sco_data = conn; conn->hcon = hcon; @@ -187,20 +189,21 @@ static void sco_conn_del(struct hci_conn *hcon, int err) /* Kill socket */ sco_conn_lock(conn); sk = conn->sk; + if (sk) + sock_hold(sk); sco_conn_unlock(conn); if (sk) { - sock_hold(sk); lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); release_sock(sk); sock_put(sk); - - /* Ensure no more work items will run before freeing conn. */ - cancel_delayed_work_sync(&conn->timeout_work); } + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); + hcon->sco_data = NULL; kfree(conn); } @@ -213,8 +216,6 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, sco_pi(sk)->conn = conn; conn->sk = sk; - INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); - if (parent) bt_accept_enqueue(parent, sk, true); } @@ -252,7 +253,7 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return -EOPNOTSUPP; hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, - sco_pi(sk)->setting); + sco_pi(sk)->setting, &sco_pi(sk)->codec); if (IS_ERR(hcon)) return PTR_ERR(hcon); @@ -280,11 +281,10 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return err; } -static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) +static int sco_send_frame(struct sock *sk, struct sk_buff *skb) { struct sco_conn *conn = sco_pi(sk)->conn; - struct sk_buff *skb; - int err; + int len = skb->len; /* Check outgoing MTU */ if (len > conn->mtu) @@ -292,15 +292,6 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) BT_DBG("sk %p len %d", sk, len); - skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return err; - - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - kfree_skb(skb); - return -EFAULT; - } - hci_send_sco(conn->hcon, skb); return len; @@ -444,6 +435,7 @@ static void __sco_sock_close(struct sock *sk) sock_set_flag(sk, SOCK_ZAPPED); break; } + } /* Must be called on unlocked socket. */ @@ -504,6 +496,10 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, sk->sk_state = BT_OPEN; sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; + sco_pi(sk)->codec.id = BT_CODEC_CVSD; + sco_pi(sk)->codec.cid = 0xffff; + sco_pi(sk)->codec.vid = 0xffff; + sco_pi(sk)->codec.data_path = 0x00; bt_sock_link(&sco_sk_list, sk); return sk; @@ -725,6 +721,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; + struct sk_buff *skb; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -736,14 +733,21 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); + if (IS_ERR(skb)) + return PTR_ERR(skb); + lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, msg, len); + err = sco_send_frame(sk, skb); else err = -ENOTCONN; release_sock(sk); + + if (err < 0) + kfree_skb(skb); return err; } @@ -825,6 +829,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, int len, err = 0; struct bt_voice voice; u32 opt; + struct bt_codecs *codecs; + struct hci_dev *hdev; + __u8 buffer[255]; BT_DBG("sk %p", sk); @@ -872,6 +879,16 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, } sco_pi(sk)->setting = voice.setting; + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, + BDADDR_BREDR); + if (!hdev) { + err = -EBADFD; + break; + } + if (enhanced_sco_capable(hdev) && + voice.setting == BT_VOICE_TRANSPARENT) + sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT; + hci_dev_put(hdev); break; case BT_PKT_STATUS: @@ -886,6 +903,57 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS; break; + case BT_CODEC: + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND && + sk->sk_state != BT_CONNECT2) { + err = -EINVAL; + break; + } + + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, + BDADDR_BREDR); + if (!hdev) { + err = -EBADFD; + break; + } + + if (!hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) { + hci_dev_put(hdev); + err = -EOPNOTSUPP; + break; + } + + if (!hdev->get_data_path_id) { + hci_dev_put(hdev); + err = -EOPNOTSUPP; + break; + } + + if (optlen < sizeof(struct bt_codecs) || + optlen > sizeof(buffer)) { + hci_dev_put(hdev); + err = -EINVAL; + break; + } + + if (copy_from_sockptr(buffer, optval, optlen)) { + hci_dev_put(hdev); + err = -EFAULT; + break; + } + + codecs = (void *)buffer; + + if (codecs->num_codecs > 1) { + hci_dev_put(hdev); + err = -EINVAL; + break; + } + + sco_pi(sk)->codec = codecs->codecs[0]; + hci_dev_put(hdev); + break; + default: err = -ENOPROTOOPT; break; @@ -964,6 +1032,12 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, struct bt_voice voice; u32 phys; int pkt_status; + int buf_len; + struct codec_list *c; + u8 num_codecs, i, __user *ptr; + struct hci_dev *hdev; + struct hci_codec_caps *caps; + struct bt_codec codec; BT_DBG("sk %p", sk); @@ -1028,6 +1102,101 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; + case BT_CODEC: + num_codecs = 0; + buf_len = 0; + + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); + if (!hdev) { + err = -EBADFD; + break; + } + + if (!hci_dev_test_flag(hdev, HCI_OFFLOAD_CODECS_ENABLED)) { + hci_dev_put(hdev); + err = -EOPNOTSUPP; + break; + } + + if (!hdev->get_data_path_id) { + hci_dev_put(hdev); + err = -EOPNOTSUPP; + break; + } + + /* find total buffer size required to copy codec + caps */ + hci_dev_lock(hdev); + list_for_each_entry(c, &hdev->local_codecs, list) { + if (c->transport != HCI_TRANSPORT_SCO_ESCO) + continue; + num_codecs++; + for (i = 0, caps = c->caps; i < c->num_caps; i++) { + buf_len += 1 + caps->len; + caps = (void *)&caps->data[caps->len]; + } + buf_len += sizeof(struct bt_codec); + } + hci_dev_unlock(hdev); + + buf_len += sizeof(struct bt_codecs); + if (buf_len > len) { + hci_dev_put(hdev); + err = -ENOBUFS; + break; + } + ptr = optval; + + if (put_user(num_codecs, ptr)) { + hci_dev_put(hdev); + err = -EFAULT; + break; + } + ptr += sizeof(num_codecs); + + /* Iterate all the codecs supported over SCO and populate + * codec data + */ + hci_dev_lock(hdev); + list_for_each_entry(c, &hdev->local_codecs, list) { + if (c->transport != HCI_TRANSPORT_SCO_ESCO) + continue; + + codec.id = c->id; + codec.cid = c->cid; + codec.vid = c->vid; + err = hdev->get_data_path_id(hdev, &codec.data_path); + if (err < 0) + break; + codec.num_caps = c->num_caps; + if (copy_to_user(ptr, &codec, sizeof(codec))) { + err = -EFAULT; + break; + } + ptr += sizeof(codec); + + /* find codec capabilities data length */ + len = 0; + for (i = 0, caps = c->caps; i < c->num_caps; i++) { + len += 1 + caps->len; + caps = (void *)&caps->data[caps->len]; + } + + /* copy codec capabilities data */ + if (len && copy_to_user(ptr, c->caps, len)) { + err = -EFAULT; + break; + } + ptr += len; + } + + if (!err && put_user(buf_len, optlen)) + err = -EFAULT; + + hci_dev_unlock(hdev); + hci_dev_put(hdev); + + break; + default: err = -ENOPROTOOPT; break; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b5f4ef35357c..072f0c16c779 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -483,11 +483,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) return -EINVAL; /* priority is allowed */ - - if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority), - offsetof(struct __sk_buff, ifindex))) - return -EINVAL; - + /* ingress_ifindex is allowed */ /* ifindex is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex), @@ -511,11 +507,18 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* gso_size is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), + offsetof(struct __sk_buff, hwtstamp))) + return -EINVAL; + + /* hwtstamp is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp), sizeof(struct __sk_buff))) return -EINVAL; skb->mark = __skb->mark; skb->priority = __skb->priority; + skb->skb_iif = __skb->ingress_ifindex; skb->tstamp = __skb->tstamp; memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); @@ -532,6 +535,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) return -EINVAL; skb_shinfo(skb)->gso_segs = __skb->gso_segs; skb_shinfo(skb)->gso_size = __skb->gso_size; + skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp; return 0; } @@ -545,11 +549,13 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) __skb->mark = skb->mark; __skb->priority = skb->priority; + __skb->ingress_ifindex = skb->skb_iif; __skb->ifindex = skb->dev->ifindex; __skb->tstamp = skb->tstamp; memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); __skb->wire_len = cb->pkt_len; __skb->gso_segs = skb_shinfo(skb)->gso_segs; + __skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp; } static struct proto bpf_dummy_proto = { @@ -801,7 +807,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (ret) goto free_data; - bpf_prog_change_xdp(NULL, prog); + if (repeat > 1) + bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); /* We convert the xdp_buff back to an xdp_md before checking the return * code so the reference count of any held netdevice will be decremented @@ -822,7 +829,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, sizeof(struct xdp_md)); out: - bpf_prog_change_xdp(prog, NULL); + if (repeat > 1) + bpf_prog_change_xdp(prog, NULL); free_data: kfree(data); free_ctx: diff --git a/net/bridge/br.c b/net/bridge/br.c index d3a32c6813e0..1fac72cc617f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -36,7 +36,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v bool changed_addr; int err; - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { err = br_vlan_bridge_event(dev, event, ptr); if (err) return notifier_from_errno(err); @@ -349,7 +349,7 @@ static void __net_exit br_net_exit(struct net *net) rtnl_lock(); for_each_netdev(net, dev) - if (dev->priv_flags & IFF_EBRIDGE) + if (netif_is_bridge_master(dev)) br_dev_delete(dev, &list); unregister_netdevice_many(&list); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 46812b659710..a6a68e18c70a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -825,7 +825,7 @@ int br_fdb_dump(struct sk_buff *skb, struct net_bridge_fdb_entry *f; int err = 0; - if (!(dev->priv_flags & IFF_EBRIDGE)) + if (!netif_is_bridge_master(dev)) return err; if (!filter_dev) { @@ -1076,7 +1076,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { @@ -1173,7 +1173,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_bridge *br; int err; - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4a02f8bb278a..c11bba3e7ec0 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -471,7 +471,7 @@ int br_del_bridge(struct net *net, const char *name) if (dev == NULL) ret = -ENXIO; /* Could not find device */ - else if (!(dev->priv_flags & IFF_EBRIDGE)) { + else if (!netif_is_bridge_master(dev)) { /* Attempt to delete non bridge device! */ ret = -EPERM; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 793b0db9d9a3..db4ab2c2ce18 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -26,7 +26,7 @@ static int get_bridge_ifindices(struct net *net, int *indices, int num) for_each_netdev_rcu(net, dev) { if (i >= num) break; - if (dev->priv_flags & IFF_EBRIDGE) + if (netif_is_bridge_master(dev)) indices[i++] = dev->ifindex; } rcu_read_unlock(); @@ -71,7 +71,8 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, num = br_fdb_fillbuf(br, buf, maxnum, offset); if (num > 0) { - if (copy_to_user(userbuf, buf, num*sizeof(struct __fdb_entry))) + if (copy_to_user(userbuf, buf, + array_size(num, sizeof(struct __fdb_entry)))) num = -EFAULT; } kfree(buf); @@ -188,7 +189,7 @@ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user return -ENOMEM; get_port_ifindices(br, indices, num); - if (copy_to_user(argp, indices, num * sizeof(int))) + if (copy_to_user(argp, indices, array_size(num, sizeof(int)))) num = -EFAULT; kfree(indices); return num; @@ -336,7 +337,8 @@ static int old_deviceless(struct net *net, void __user *uarg) args[2] = get_bridge_ifindices(net, indices, args[2]); - ret = copy_to_user(uarg, indices, args[2]*sizeof(int)) + ret = copy_to_user(uarg, indices, + array_size(args[2], sizeof(int))) ? -EFAULT : args[2]; kfree(indices); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 0281453f7766..61ccf46fcc21 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -422,7 +422,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; for_each_netdev_rcu(net, dev) { - if (dev->priv_flags & IFF_EBRIDGE) { + if (netif_is_bridge_master(dev)) { struct net_bridge *br = netdev_priv(dev); struct br_port_msg *bpm; @@ -1016,7 +1016,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (!(dev->priv_flags & IFF_EBRIDGE)) { + if (!netif_is_bridge_master(dev)) { NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); return -EOPNOTSUPP; } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8edfb98ae1d5..b5af68c105a8 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -968,7 +968,7 @@ static int brnf_device_event(struct notifier_block *unused, unsigned long event, struct net *net; int ret; - if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE)) + if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev)) return NOTIFY_DONE; ASSERT_RTNL(); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5c6c4305ed23..0c8b5f1a15bc 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -106,7 +106,7 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, p = br_port_get_check_rcu(dev); if (p) vg = nbp_vlan_group_rcu(p); - } else if (dev->priv_flags & IFF_EBRIDGE) { + } else if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group_rcu(br); } @@ -1050,7 +1050,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) p = br_port_get_rtnl(dev); /* We want to accept dev as bridge itself as well */ - if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + if (!p && !netif_is_bridge_master(dev)) return -EINVAL; err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index ba55851fe132..75204d36d7f9 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -233,7 +233,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); memcpy(br->bridge_id.addr, addr, ETH_ALEN); - memcpy(br->dev->dev_addr, addr, ETH_ALEN); + eth_hw_addr_set(br->dev, addr); list_for_each_entry(p, &br->port_list, list) { if (ether_addr_equal(p->designated_bridge.addr, oldaddr)) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index a7af4eaff17d..1a11064f9990 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -66,7 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb, NFPROTO_BRIDGE, s->in, NULL, NULL, s->net, NULL); - ret = ebt_do_table(skb, &state, priv); + ret = ebt_do_table(priv, skb, &state); if (ret != NF_DROP) return ret; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index c0b121df4a9a..cb949436bc0e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -58,28 +58,21 @@ static const struct ebt_table frame_filter = { .me = THIS_MODULE, }; -static unsigned int -ebt_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ebt_do_table(skb, state, priv); -} - static const struct nf_hook_ops ebt_ops_filter[] = { { - .hook = ebt_filter_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_filter_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_filter_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_FILTER_OTHER, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 4078151c224f..5ee0531ae506 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -58,27 +58,21 @@ static const struct ebt_table frame_nat = { .me = THIS_MODULE, }; -static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ebt_do_table(skb, state, priv); -} - static const struct nf_hook_ops ebt_ops_nat[] = { { - .hook = ebt_nat_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { - .hook = ebt_nat_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { - .hook = ebt_nat_hook, + .hook = ebt_do_table, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_NAT_DST_BRIDGED, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ba045f35114d..460d3064dc15 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -189,10 +189,10 @@ ebt_get_target_c(const struct ebt_entry *e) } /* Do some firewalling */ -unsigned int ebt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct ebt_table *table) +unsigned int ebt_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { + struct ebt_table *table = priv; unsigned int hook = state->hook; int i, nentries; struct ebt_entry *point; diff --git a/net/core/Makefile b/net/core/Makefile index 35ced6201814..4268846f2f47 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -36,3 +36,4 @@ obj-$(CONFIG_FAILOVER) += failover.o obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o +obj-$(CONFIG_OF) += of_net.o diff --git a/net/core/dev.c b/net/core/dev.c index 7ee9fecd3aff..4e3d19a06de4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -140,7 +140,7 @@ #include <linux/if_macvlan.h> #include <linux/errqueue.h> #include <linux/hrtimer.h> -#include <linux/netfilter_ingress.h> +#include <linux/netfilter_netdev.h> #include <linux/crash_dump.h> #include <linux/sctp.h> #include <net/udp_tunnel.h> @@ -303,6 +303,12 @@ static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net, return NULL; } +bool netdev_name_in_use(struct net *net, const char *name) +{ + return netdev_name_node_lookup(net, name); +} +EXPORT_SYMBOL(netdev_name_in_use); + int netdev_name_node_alt_create(struct net_device *dev, const char *name) { struct netdev_name_node *name_node; @@ -1133,7 +1139,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) } snprintf(buf, IFNAMSIZ, name, i); - if (!__dev_get_by_name(net, buf)) + if (!netdev_name_in_use(net, buf)) return i; /* It is possible to run out of possible slots @@ -1187,7 +1193,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev, if (strchr(name, '%')) return dev_alloc_name_ns(net, dev, name); - else if (__dev_get_by_name(net, name)) + else if (netdev_name_in_use(net, name)) return -EEXIST; else if (dev->name != name) strlcpy(dev->name, name, IFNAMSIZ); @@ -1290,8 +1296,8 @@ rollback: old_assign_type = NET_NAME_RENAMED; goto rollback; } else { - pr_err("%s: name change rollback failed: %d\n", - dev->name, ret); + netdev_err(dev, "name change rollback failed: %d\n", + ret); } } @@ -2345,7 +2351,7 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) /* If TC0 is invalidated disable TC mapping */ if (tc->offset + tc->count > txq) { - pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); + netdev_warn(dev, "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); dev->num_tc = 0; return; } @@ -2356,8 +2362,8 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) tc = &dev->tc_to_txq[q]; if (tc->offset + tc->count > txq) { - pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", - i, q); + netdev_warn(dev, "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", + i, q); netdev_set_prio_tc_map(dev, i, 0); } } @@ -2921,6 +2927,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); + dev_qdisc_change_real_num_tx(dev, txq); + dev->real_num_tx_queues = txq; if (disabling) { @@ -3408,7 +3416,7 @@ EXPORT_SYMBOL(__skb_gso_segment); #ifdef CONFIG_BUG static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb) { - pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>"); + netdev_err(dev, "hw csum failure\n"); skb_dump(KERN_ERR, skb, true); dump_stack(); } @@ -3918,6 +3926,7 @@ EXPORT_SYMBOL(dev_loopback_xmit); static struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { +#ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; @@ -3953,6 +3962,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) default: break; } +#endif /* CONFIG_NET_CLS_ACT */ return skb; } @@ -4146,13 +4156,20 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) qdisc_pkt_len_init(skb); #ifdef CONFIG_NET_CLS_ACT skb->tc_at_ingress = 0; -# ifdef CONFIG_NET_EGRESS +#endif +#ifdef CONFIG_NET_EGRESS if (static_branch_unlikely(&egress_needed_key)) { + if (nf_hook_egress_active()) { + skb = nf_hook_egress(skb, &rc, dev); + if (!skb) + goto out; + } + nf_skip_egress(skb, true); skb = sch_handle_egress(skb, &rc, dev); if (!skb) goto out; + nf_skip_egress(skb, false); } -# endif #endif /* If device/qdisc don't need skb->dst, release it right now while * its hot in this cpu cache. @@ -5294,6 +5311,7 @@ skip_taps: if (static_branch_unlikely(&ingress_needed_key)) { bool another = false; + nf_skip_egress(skb, true); skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev, &another); if (another) @@ -5301,6 +5319,7 @@ skip_taps: if (!skb) goto out; + nf_skip_egress(skb, false); if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) goto out; } @@ -5837,7 +5856,7 @@ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int se gro_normal_list(napi); } -static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) +static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { struct packet_offload *ptype; __be16 type = skb->protocol; @@ -5866,12 +5885,11 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) if (err) { WARN_ON(&ptype->list == head); kfree_skb(skb); - return NET_RX_SUCCESS; + return; } out: gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); - return NET_RX_SUCCESS; } static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, @@ -6898,19 +6916,25 @@ EXPORT_SYMBOL(netif_napi_add); void napi_disable(struct napi_struct *n) { + unsigned long val, new; + might_sleep(); set_bit(NAPI_STATE_DISABLE, &n->state); - while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) - msleep(1); - while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state)) - msleep(1); + do { + val = READ_ONCE(n->state); + if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) { + usleep_range(20, 200); + continue; + } + + new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC; + new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL); + } while (cmpxchg(&n->state, val, new) != val); hrtimer_cancel(&n->timer); - clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); clear_bit(NAPI_STATE_DISABLE, &n->state); - clear_bit(NAPI_STATE_THREADED, &n->state); } EXPORT_SYMBOL(napi_disable); @@ -6988,8 +7012,8 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) } if (unlikely(work > weight)) - pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", - n->poll, work, weight); + netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n", + n->poll, work, weight); if (likely(work < weight)) return work; @@ -8543,8 +8567,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) dev->flags &= ~IFF_PROMISC; else { dev->promiscuity -= inc; - pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", - dev->name); + netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n"); return -EOVERFLOW; } } @@ -8614,8 +8637,7 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) dev->flags &= ~IFF_ALLMULTI; else { dev->allmulti -= inc; - pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", - dev->name); + netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n"); return -EOVERFLOW; } } @@ -9152,14 +9174,11 @@ int dev_get_port_parent_id(struct net_device *dev, } err = devlink_compat_switch_id_get(dev, ppid); - if (!err || err != -EOPNOTSUPP) + if (!recurse || err != -EOPNOTSUPP) return err; - if (!recurse) - return -EOPNOTSUPP; - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = dev_get_port_parent_id(lower_dev, ppid, recurse); + err = dev_get_port_parent_id(lower_dev, ppid, true); if (err) break; if (!first.id_len) @@ -10860,7 +10879,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; - nf_hook_ingress_init(dev); + nf_hook_netdev_init(dev); return dev; @@ -11146,7 +11165,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, * we can use it in the destination network namespace. */ err = -EEXIST; - if (__dev_get_by_name(net, dev->name)) { + if (netdev_name_in_use(net, dev->name)) { /* We get here if we can't use the current device name */ if (!pat) goto out; @@ -11499,7 +11518,7 @@ static void __net_exit default_device_exit(struct net *net) /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); - if (__dev_get_by_name(&init_net, fb_name)) + if (netdev_name_in_use(&init_net, fb_name)) snprintf(fb_name, IFNAMSIZ, "dev%%d"); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { diff --git a/net/core/devlink.c b/net/core/devlink.c index a856ae401ea5..3464854015a2 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -30,6 +30,63 @@ #define CREATE_TRACE_POINTS #include <trace/events/devlink.h> +#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ + (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) + +struct devlink_dev_stats { + u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; + u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; +}; + +struct devlink { + u32 index; + struct list_head port_list; + struct list_head rate_list; + struct list_head sb_list; + struct list_head dpipe_table_list; + struct list_head resource_list; + struct list_head param_list; + struct list_head region_list; + struct list_head reporter_list; + struct mutex reporters_lock; /* protects reporter_list */ + struct devlink_dpipe_headers *dpipe_headers; + struct list_head trap_list; + struct list_head trap_group_list; + struct list_head trap_policer_list; + const struct devlink_ops *ops; + u64 features; + struct xarray snapshot_ids; + struct devlink_dev_stats stats; + struct device *dev; + possible_net_t _net; + /* Serializes access to devlink instance specific objects such as + * port, sb, dpipe, resource, params, region, traps and more. + */ + struct mutex lock; + u8 reload_failed:1; + refcount_t refcount; + struct completion comp; + char priv[0] __aligned(NETDEV_ALIGN); +}; + +void *devlink_priv(struct devlink *devlink) +{ + return &devlink->priv; +} +EXPORT_SYMBOL_GPL(devlink_priv); + +struct devlink *priv_to_devlink(void *priv) +{ + return container_of(priv, struct devlink, priv); +} +EXPORT_SYMBOL_GPL(priv_to_devlink); + +struct device *devlink_to_dev(const struct devlink *devlink) +{ + return devlink->dev; +} +EXPORT_SYMBOL_GPL(devlink_to_dev); + static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = { { .name = "destination mac", @@ -95,6 +152,22 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); #define DEVLINK_REGISTERED XA_MARK_1 +/* devlink instances are open to the access from the user space after + * devlink_register() call. Such logical barrier allows us to have certain + * expectations related to locking. + * + * Before *_register() - we are in initialization stage and no parallel + * access possible to the devlink instance. All drivers perform that phase + * by implicitly holding device_lock. + * + * After *_register() - users and driver can access devlink instance at + * the same time. + */ +#define ASSERT_DEVLINK_REGISTERED(d) \ + WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) +#define ASSERT_DEVLINK_NOT_REGISTERED(d) \ + WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) + /* devlink_mutex * * An overall lock guarding every operation coming from userspace. @@ -742,6 +815,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); + WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -1040,11 +1114,15 @@ nla_put_failure: static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { + struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -1055,19 +1133,22 @@ static void devlink_port_notify(struct devlink_port *devlink_port, return; } - genlmsg_multicast_netns(&devlink_nl_family, - devlink_net(devlink_port->devlink), msg, 0, - DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, + 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static void devlink_rate_notify(struct devlink_rate *devlink_rate, enum devlink_command cmd) { + struct devlink *devlink = devlink_rate->devlink; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -1078,9 +1159,8 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, return; } - genlmsg_multicast_netns(&devlink_nl_family, - devlink_net(devlink_rate->devlink), msg, 0, - DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, + 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, @@ -3952,9 +4032,6 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, struct net *curr_net; int err; - if (!devlink->reload_enabled) - return -EOPNOTSUPP; - memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); @@ -4022,7 +4099,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) u32 actions_performed; int err; - if (!devlink_reload_supported(devlink->ops)) + if (!(devlink->features & DEVLINK_F_RELOAD)) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -4150,6 +4227,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink, WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE && cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); + WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -5070,6 +5148,11 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink, if (err) goto nla_put_failure; + err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, + region->max_snapshots); + if (err) + goto nla_put_failure; + err = devlink_nl_region_snapshots_id_put(msg, devlink, region); if (err) goto nla_put_failure; @@ -5145,17 +5228,19 @@ static void devlink_nl_region_notify(struct devlink_region *region, struct devlink_snapshot *snapshot, enum devlink_command cmd) { + struct devlink *devlink = region->devlink; struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0); if (IS_ERR(msg)) return; - genlmsg_multicast_netns(&devlink_nl_family, - devlink_net(region->devlink), msg, 0, - DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, + 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } /** @@ -6269,23 +6354,21 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, return 0; } -int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) { if (fmsg->putting_binary) return -EINVAL; return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); } -EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put); -int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) { if (fmsg->putting_binary) return -EINVAL; return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); } -EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put); int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) { @@ -6296,14 +6379,13 @@ int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); -int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) { if (fmsg->putting_binary) return -EINVAL; return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); } -EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put); int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) { @@ -6923,10 +7005,12 @@ genlmsg_cancel: static void devlink_recover_notify(struct devlink_health_reporter *reporter, enum devlink_command cmd) { + struct devlink *devlink = reporter->devlink; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); + WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -6938,9 +7022,8 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, return; } - genlmsg_multicast_netns(&devlink_nl_family, - devlink_net(reporter->devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, + 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } void @@ -8900,6 +8983,25 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) } /** + * devlink_set_features - Set devlink supported features + * + * @devlink: devlink + * @features: devlink support features + * + * This interface allows us to set reload ops separatelly from + * the devlink_alloc. + */ +void devlink_set_features(struct devlink *devlink, u64 features) +{ + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + + WARN_ON(features & DEVLINK_F_RELOAD && + !devlink_reload_supported(devlink->ops)); + devlink->features = features; +} +EXPORT_SYMBOL_GPL(devlink_set_features); + +/** * devlink_alloc_ns - Allocate new devlink instance resources * in specific namespace * @@ -8958,18 +9060,98 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, } EXPORT_SYMBOL_GPL(devlink_alloc_ns); +static void +devlink_trap_policer_notify(struct devlink *devlink, + const struct devlink_trap_policer_item *policer_item, + enum devlink_command cmd); +static void +devlink_trap_group_notify(struct devlink *devlink, + const struct devlink_trap_group_item *group_item, + enum devlink_command cmd); +static void devlink_trap_notify(struct devlink *devlink, + const struct devlink_trap_item *trap_item, + enum devlink_command cmd); + +static void devlink_notify_register(struct devlink *devlink) +{ + struct devlink_trap_policer_item *policer_item; + struct devlink_trap_group_item *group_item; + struct devlink_trap_item *trap_item; + struct devlink_port *devlink_port; + struct devlink_rate *rate_node; + struct devlink_region *region; + + devlink_notify(devlink, DEVLINK_CMD_NEW); + list_for_each_entry(devlink_port, &devlink->port_list, list) + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + + list_for_each_entry(policer_item, &devlink->trap_policer_list, list) + devlink_trap_policer_notify(devlink, policer_item, + DEVLINK_CMD_TRAP_POLICER_NEW); + + list_for_each_entry(group_item, &devlink->trap_group_list, list) + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_NEW); + + list_for_each_entry(trap_item, &devlink->trap_list, list) + devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW); + + list_for_each_entry(rate_node, &devlink->rate_list, list) + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); + + list_for_each_entry(region, &devlink->region_list, list) + devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); + + devlink_params_publish(devlink); +} + +static void devlink_notify_unregister(struct devlink *devlink) +{ + struct devlink_trap_policer_item *policer_item; + struct devlink_trap_group_item *group_item; + struct devlink_trap_item *trap_item; + struct devlink_port *devlink_port; + struct devlink_rate *rate_node; + struct devlink_region *region; + + devlink_params_unpublish(devlink); + + list_for_each_entry_reverse(region, &devlink->region_list, list) + devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL); + + list_for_each_entry_reverse(rate_node, &devlink->rate_list, list) + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL); + + list_for_each_entry_reverse(trap_item, &devlink->trap_list, list) + devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL); + + list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list) + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_DEL); + list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list, + list) + devlink_trap_policer_notify(devlink, policer_item, + DEVLINK_CMD_TRAP_POLICER_DEL); + + list_for_each_entry_reverse(devlink_port, &devlink->port_list, list) + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); + devlink_notify(devlink, DEVLINK_CMD_DEL); +} + /** * devlink_register - Register devlink instance * * @devlink: devlink */ -int devlink_register(struct devlink *devlink) +void devlink_register(struct devlink *devlink) { + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + /* Make sure that we are in .probe() routine */ + mutex_lock(&devlink_mutex); xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); - devlink_notify(devlink, DEVLINK_CMD_NEW); + devlink_notify_register(devlink); mutex_unlock(&devlink_mutex); - return 0; } EXPORT_SYMBOL_GPL(devlink_register); @@ -8980,60 +9162,28 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { + ASSERT_DEVLINK_REGISTERED(devlink); + /* Make sure that we are in .remove() routine */ + devlink_put(devlink); wait_for_completion(&devlink->comp); mutex_lock(&devlink_mutex); - WARN_ON(devlink_reload_supported(devlink->ops) && - devlink->reload_enabled); - devlink_notify(devlink, DEVLINK_CMD_DEL); + devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); /** - * devlink_reload_enable - Enable reload of devlink instance - * - * @devlink: devlink - * - * Should be called at end of device initialization - * process when reload operation is supported. - */ -void devlink_reload_enable(struct devlink *devlink) -{ - mutex_lock(&devlink_mutex); - devlink->reload_enabled = true; - mutex_unlock(&devlink_mutex); -} -EXPORT_SYMBOL_GPL(devlink_reload_enable); - -/** - * devlink_reload_disable - Disable reload of devlink instance - * - * @devlink: devlink - * - * Should be called at the beginning of device cleanup - * process when reload operation is supported. - */ -void devlink_reload_disable(struct devlink *devlink) -{ - mutex_lock(&devlink_mutex); - /* Mutex is taken which ensures that no reload operation is in - * progress while setting up forbidded flag. - */ - devlink->reload_enabled = false; - mutex_unlock(&devlink_mutex); -} -EXPORT_SYMBOL_GPL(devlink_reload_disable); - -/** * devlink_free - Free devlink instance resources * * @devlink: devlink */ void devlink_free(struct devlink *devlink) { + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->trap_policer_list)); @@ -10090,6 +10240,9 @@ void devlink_params_publish(struct devlink *devlink) { struct devlink_param_item *param_item; + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; + list_for_each_entry(param_item, &devlink->param_list, list) { if (param_item->published) continue; @@ -10122,102 +10275,25 @@ void devlink_params_unpublish(struct devlink *devlink) EXPORT_SYMBOL_GPL(devlink_params_unpublish); /** - * devlink_param_publish - publish one configuration parameter - * - * @devlink: devlink - * @param: one configuration parameter - * - * Publish previously registered configuration parameter. - */ -void devlink_param_publish(struct devlink *devlink, - const struct devlink_param *param) -{ - struct devlink_param_item *param_item; - - list_for_each_entry(param_item, &devlink->param_list, list) { - if (param_item->param != param || param_item->published) - continue; - param_item->published = true; - devlink_param_notify(devlink, 0, param_item, - DEVLINK_CMD_PARAM_NEW); - break; - } -} -EXPORT_SYMBOL_GPL(devlink_param_publish); - -/** - * devlink_param_unpublish - unpublish one configuration parameter + * devlink_param_driverinit_value_get - get configuration parameter + * value for driver initializing * - * @devlink: devlink - * @param: one configuration parameter + * @devlink: devlink + * @param_id: parameter ID + * @init_val: value of parameter in driverinit configuration mode * - * Unpublish previously registered configuration parameter. + * This function should be used by the driver to get driverinit + * configuration for initialization after reload command. */ -void devlink_param_unpublish(struct devlink *devlink, - const struct devlink_param *param) +int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, + union devlink_param_value *init_val) { struct devlink_param_item *param_item; - list_for_each_entry(param_item, &devlink->param_list, list) { - if (param_item->param != param || !param_item->published) - continue; - param_item->published = false; - devlink_param_notify(devlink, 0, param_item, - DEVLINK_CMD_PARAM_DEL); - break; - } -} -EXPORT_SYMBOL_GPL(devlink_param_unpublish); - -/** - * devlink_port_params_register - register port configuration parameters - * - * @devlink_port: devlink port - * @params: configuration parameters array - * @params_count: number of parameters provided - * - * Register the configuration parameters supported by the port. - */ -int devlink_port_params_register(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ - return __devlink_params_register(devlink_port->devlink, - devlink_port->index, - &devlink_port->param_list, params, - params_count, - DEVLINK_CMD_PORT_PARAM_NEW, - DEVLINK_CMD_PORT_PARAM_DEL); -} -EXPORT_SYMBOL_GPL(devlink_port_params_register); - -/** - * devlink_port_params_unregister - unregister port configuration - * parameters - * - * @devlink_port: devlink port - * @params: configuration parameters array - * @params_count: number of parameters provided - */ -void devlink_port_params_unregister(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ - return __devlink_params_unregister(devlink_port->devlink, - devlink_port->index, - &devlink_port->param_list, - params, params_count, - DEVLINK_CMD_PORT_PARAM_DEL); -} -EXPORT_SYMBOL_GPL(devlink_port_params_unregister); - -static int -__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id, - union devlink_param_value *init_val) -{ - struct devlink_param_item *param_item; + if (!devlink_reload_supported(devlink->ops)) + return -EOPNOTSUPP; - param_item = devlink_param_find_by_id(param_list, param_id); + param_item = devlink_param_find_by_id(&devlink->param_list, param_id); if (!param_item) return -EINVAL; @@ -10233,54 +10309,6 @@ __devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id, return 0; } - -static int -__devlink_param_driverinit_value_set(struct devlink *devlink, - unsigned int port_index, - struct list_head *param_list, u32 param_id, - union devlink_param_value init_val, - enum devlink_command cmd) -{ - struct devlink_param_item *param_item; - - param_item = devlink_param_find_by_id(param_list, param_id); - if (!param_item) - return -EINVAL; - - if (!devlink_param_cmode_is_supported(param_item->param, - DEVLINK_PARAM_CMODE_DRIVERINIT)) - return -EOPNOTSUPP; - - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(param_item->driverinit_value.vstr, init_val.vstr); - else - param_item->driverinit_value = init_val; - param_item->driverinit_value_valid = true; - - devlink_param_notify(devlink, port_index, param_item, cmd); - return 0; -} - -/** - * devlink_param_driverinit_value_get - get configuration parameter - * value for driver initializing - * - * @devlink: devlink - * @param_id: parameter ID - * @init_val: value of parameter in driverinit configuration mode - * - * This function should be used by the driver to get driverinit - * configuration for initialization after reload command. - */ -int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val) -{ - if (!devlink_reload_supported(devlink->ops)) - return -EOPNOTSUPP; - - return __devlink_param_driverinit_value_get(&devlink->param_list, - param_id, init_val); -} EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); /** @@ -10298,61 +10326,26 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val) { - return __devlink_param_driverinit_value_set(devlink, 0, - &devlink->param_list, - param_id, init_val, - DEVLINK_CMD_PARAM_NEW); -} -EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); + struct devlink_param_item *param_item; -/** - * devlink_port_param_driverinit_value_get - get configuration parameter - * value for driver initializing - * - * @devlink_port: devlink_port - * @param_id: parameter ID - * @init_val: value of parameter in driverinit configuration mode - * - * This function should be used by the driver to get driverinit - * configuration for initialization after reload command. - */ -int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value *init_val) -{ - struct devlink *devlink = devlink_port->devlink; + param_item = devlink_param_find_by_id(&devlink->param_list, param_id); + if (!param_item) + return -EINVAL; - if (!devlink_reload_supported(devlink->ops)) + if (!devlink_param_cmode_is_supported(param_item->param, + DEVLINK_PARAM_CMODE_DRIVERINIT)) return -EOPNOTSUPP; - return __devlink_param_driverinit_value_get(&devlink_port->param_list, - param_id, init_val); -} -EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get); + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; + param_item->driverinit_value_valid = true; -/** - * devlink_port_param_driverinit_value_set - set value of configuration - * parameter for driverinit - * configuration mode - * - * @devlink_port: devlink_port - * @param_id: parameter ID - * @init_val: value of parameter to set for driverinit configuration mode - * - * This function should be used by the driver to set driverinit - * configuration mode default value. - */ -int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value init_val) -{ - return __devlink_param_driverinit_value_set(devlink_port->devlink, - devlink_port->index, - &devlink_port->param_list, - param_id, init_val, - DEVLINK_CMD_PORT_PARAM_NEW); + devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); + return 0; } -EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set); +EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); /** * devlink_param_value_changed - notify devlink on a parameter's value @@ -10378,50 +10371,6 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) EXPORT_SYMBOL_GPL(devlink_param_value_changed); /** - * devlink_port_param_value_changed - notify devlink on a parameter's value - * change. Should be called by the driver - * right after the change. - * - * @devlink_port: devlink_port - * @param_id: parameter ID - * - * This function should be used by the driver to notify devlink on value - * change, excluding driverinit configuration mode. - * For driverinit configuration mode driver should use the function - * devlink_port_param_driverinit_value_set() instead. - */ -void devlink_port_param_value_changed(struct devlink_port *devlink_port, - u32 param_id) -{ - struct devlink_param_item *param_item; - - param_item = devlink_param_find_by_id(&devlink_port->param_list, - param_id); - WARN_ON(!param_item); - - devlink_param_notify(devlink_port->devlink, devlink_port->index, - param_item, DEVLINK_CMD_PORT_PARAM_NEW); -} -EXPORT_SYMBOL_GPL(devlink_port_param_value_changed); - -/** - * devlink_param_value_str_fill - Safely fill-up the string preventing - * from overflow of the preallocated buffer - * - * @dst_val: destination devlink_param_value - * @src: source buffer - */ -void devlink_param_value_str_fill(union devlink_param_value *dst_val, - const char *src) -{ - size_t len; - - len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE); - WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE); -} -EXPORT_SYMBOL_GPL(devlink_param_value_str_fill); - -/** * devlink_region_create - create a new address region * * @devlink: devlink @@ -10839,6 +10788,8 @@ devlink_trap_group_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && cmd != DEVLINK_CMD_TRAP_GROUP_DEL); + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -10880,6 +10831,8 @@ static void devlink_trap_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && cmd != DEVLINK_CMD_TRAP_DEL); + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -11261,6 +11214,8 @@ devlink_trap_policer_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW && cmd != DEVLINK_CMD_TRAP_POLICER_DEL); + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -11429,6 +11384,24 @@ free_msg: nlmsg_free(msg); } +static struct devlink_port *netdev_to_devlink_port(struct net_device *dev) +{ + if (!dev->netdev_ops->ndo_get_devlink_port) + return NULL; + + return dev->netdev_ops->ndo_get_devlink_port(dev); +} + +static struct devlink *netdev_to_devlink(struct net_device *dev) +{ + struct devlink_port *devlink_port = netdev_to_devlink_port(dev); + + if (!devlink_port) + return NULL; + + return devlink_port->devlink; +} + void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { @@ -11538,7 +11511,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) if (!net_eq(devlink_net(devlink), net)) goto retry; - WARN_ON(!devlink_reload_supported(devlink->ops)); + WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_LIMIT_UNSPEC, diff --git a/net/core/filter.c b/net/core/filter.c index 2e32cee2c469..4bace37a6a44 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7765,6 +7765,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type break; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; + case bpf_ctx_range(struct __sk_buff, hwtstamp): + if (type == BPF_WRITE || size != sizeof(__u64)) + return false; + break; case bpf_ctx_range(struct __sk_buff, tstamp): if (size != sizeof(__u64)) return false; @@ -7774,6 +7778,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type return false; info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; break; + case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1: + /* Explicitly prohibit access to padding in __sk_buff. */ + return false; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -7802,6 +7809,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): + case bpf_ctx_range(struct __sk_buff, hwtstamp): return false; } @@ -7872,6 +7880,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): + case bpf_ctx_range(struct __sk_buff, hwtstamp): return false; } @@ -8373,6 +8382,7 @@ static bool sk_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): + case bpf_ctx_range(struct __sk_buff, hwtstamp): return false; } @@ -8884,6 +8894,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct sk_buff, sk)); break; + case offsetof(struct __sk_buff, hwtstamp): + BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8); + BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0); + + insn = bpf_convert_shinfo_access(si, insn); + *insn++ = BPF_LDX_MEM(BPF_DW, + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + hwtstamps, 8, + target_size)); + break; } return insn - insn_buf; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index bac0184cf3de..7d0a9f84aaf7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1196,9 +1196,8 @@ proto_again: break; } - proto = hdr->proto; nhoff += PPPOE_SES_HLEN; - switch (proto) { + switch (hdr->proto) { case htons(PPP_IP): proto = htons(ETH_P_IP); fdret = FLOW_DISSECT_RET_PROTO_AGAIN; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 8e582e29a41e..4fcbdd71c59f 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -40,10 +40,10 @@ */ struct net_rate_estimator { - struct gnet_stats_basic_packed *bstats; + struct gnet_stats_basic_sync *bstats; spinlock_t *stats_lock; - seqcount_t *running; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; + bool running; + struct gnet_stats_basic_sync __percpu *cpu_bstats; u8 ewma_log; u8 intvl_log; /* period : (250ms << intvl_log) */ @@ -60,13 +60,13 @@ struct net_rate_estimator { }; static void est_fetch_counters(struct net_rate_estimator *e, - struct gnet_stats_basic_packed *b) + struct gnet_stats_basic_sync *b) { - memset(b, 0, sizeof(*b)); + gnet_stats_basic_sync_init(b); if (e->stats_lock) spin_lock(e->stats_lock); - __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats); + gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running); if (e->stats_lock) spin_unlock(e->stats_lock); @@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e, static void est_timer(struct timer_list *t) { struct net_rate_estimator *est = from_timer(est, t, timer); - struct gnet_stats_basic_packed b; + struct gnet_stats_basic_sync b; + u64 b_bytes, b_packets; u64 rate, brate; est_fetch_counters(est, &b); - brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); + b_bytes = u64_stats_read(&b.bytes); + b_packets = u64_stats_read(&b.packets); + + brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log); brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); - rate = (b.packets - est->last_packets) << (10 - est->intvl_log); + rate = (b_packets - est->last_packets) << (10 - est->intvl_log); rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); write_seqcount_begin(&est->seq); @@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t) est->avpps += rate; write_seqcount_end(&est->seq); - est->last_bytes = b.bytes; - est->last_packets = b.packets; + est->last_bytes = b_bytes; + est->last_packets = b_packets; est->next_jiffies += ((HZ/4) << est->intvl_log); @@ -109,7 +113,9 @@ static void est_timer(struct timer_list *t) * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @lock: lock for statistics and control path - * @running: qdisc running seqcount + * @running: true if @bstats represents a running qdisc, thus @bstats' + * internal values might change during basic reads. Only used + * if @bstats_cpu is NULL * @opt: rate estimator configuration TLV * * Creates a new rate estimator with &bstats as source and &rate_est @@ -121,16 +127,16 @@ static void est_timer(struct timer_list *t) * Returns 0 on success or a negative error code. * */ -int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_new_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, + bool running, struct nlattr *opt) { struct gnet_estimator *parm = nla_data(opt); struct net_rate_estimator *old, *est; - struct gnet_stats_basic_packed b; + struct gnet_stats_basic_sync b; int intvl_log; if (nla_len(opt) < sizeof(*parm)) @@ -164,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est_fetch_counters(est, &b); if (lock) local_bh_enable(); - est->last_bytes = b.bytes; - est->last_packets = b.packets; + est->last_bytes = u64_stats_read(&b.bytes); + est->last_packets = u64_stats_read(&b.packets); if (lock) spin_lock_bh(lock); @@ -214,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator); * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @lock: lock for statistics and control path - * @running: qdisc running seqcount (might be NULL) + * @running: true if @bstats represents a running qdisc, thus @bstats' + * internal values might change during basic reads. Only used + * if @cpu_bstats is NULL * @opt: rate estimator configuration TLV * * Replaces the configuration of a rate estimator by calling @@ -222,11 +230,11 @@ EXPORT_SYMBOL(gen_kill_estimator); * * Returns 0 on success or a negative error code. */ -int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu_bstats, +int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, - seqcount_t *running, struct nlattr *opt) + bool running, struct nlattr *opt) { return gen_new_estimator(bstats, cpu_bstats, rate_est, lock, running, opt); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index e491b083b348..a10335b4ba2d 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -18,7 +18,7 @@ #include <linux/gen_stats.h> #include <net/netlink.h> #include <net/gen_stats.h> - +#include <net/sch_generic.h> static inline int gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) @@ -114,63 +114,112 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, } EXPORT_SYMBOL(gnet_stats_start_copy); -static void -__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu) +/* Must not be inlined, due to u64_stats seqcount_t lockdep key */ +void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b) { + u64_stats_set(&b->bytes, 0); + u64_stats_set(&b->packets, 0); + u64_stats_init(&b->syncp); +} +EXPORT_SYMBOL(gnet_stats_basic_sync_init); + +static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu) +{ + u64 t_bytes = 0, t_packets = 0; int i; for_each_possible_cpu(i) { - struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i); + struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); unsigned int start; u64 bytes, packets; do { start = u64_stats_fetch_begin_irq(&bcpu->syncp); - bytes = bcpu->bstats.bytes; - packets = bcpu->bstats.packets; + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); - bstats->bytes += bytes; - bstats->packets += packets; + t_bytes += bytes; + t_packets += packets; + } + _bstats_update(bstats, t_bytes, t_packets); +} + +void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, bool running) +{ + unsigned int start; + u64 bytes = 0; + u64 packets = 0; + + WARN_ON_ONCE((cpu || running) && in_hardirq()); + + if (cpu) { + gnet_stats_add_basic_cpu(bstats, cpu); + return; } + do { + if (running) + start = u64_stats_fetch_begin_irq(&b->syncp); + bytes = u64_stats_read(&b->bytes); + packets = u64_stats_read(&b->packets); + } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); + + _bstats_update(bstats, bytes, packets); } +EXPORT_SYMBOL(gnet_stats_add_basic); -void -__gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b) +static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, bool running) { - unsigned int seq; + unsigned int start; if (cpu) { - __gnet_stats_copy_basic_cpu(bstats, cpu); + u64 t_bytes = 0, t_packets = 0; + int i; + + for_each_possible_cpu(i) { + struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); + unsigned int start; + u64 bytes, packets; + + do { + start = u64_stats_fetch_begin_irq(&bcpu->syncp); + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); + } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); + + t_bytes += bytes; + t_packets += packets; + } + *ret_bytes = t_bytes; + *ret_packets = t_packets; return; } do { if (running) - seq = read_seqcount_begin(running); - bstats->bytes = b->bytes; - bstats->packets = b->packets; - } while (running && read_seqcount_retry(running, seq)); + start = u64_stats_fetch_begin_irq(&b->syncp); + *ret_bytes = u64_stats_read(&b->bytes); + *ret_packets = u64_stats_read(&b->packets); + } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); } -EXPORT_SYMBOL(__gnet_stats_copy_basic); static int -___gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b, - int type) +___gnet_stats_copy_basic(struct gnet_dump *d, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, + int type, bool running) { - struct gnet_stats_basic_packed bstats = {0}; + u64 bstats_bytes, bstats_packets; - __gnet_stats_copy_basic(running, &bstats, cpu, b); + gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running); if (d->compat_tc_stats && type == TCA_STATS_BASIC) { - d->tc_stats.bytes = bstats.bytes; - d->tc_stats.packets = bstats.packets; + d->tc_stats.bytes = bstats_bytes; + d->tc_stats.packets = bstats_packets; } if (d->tail) { @@ -178,24 +227,28 @@ ___gnet_stats_copy_basic(const seqcount_t *running, int res; memset(&sb, 0, sizeof(sb)); - sb.bytes = bstats.bytes; - sb.packets = bstats.packets; + sb.bytes = bstats_bytes; + sb.packets = bstats_packets; res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD); - if (res < 0 || sb.packets == bstats.packets) + if (res < 0 || sb.packets == bstats_packets) return res; /* emit 64bit stats only if needed */ - return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets, - sizeof(bstats.packets), TCA_STATS_PAD); + return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets, + sizeof(bstats_packets), TCA_STATS_PAD); } return 0; } /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV - * @running: seqcount_t pointer * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics + * @running: true if @b represents a running qdisc, thus @b's + * internal values might change during basic reads. + * Only used if @cpu is NULL + * + * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). @@ -204,22 +257,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running, * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b) +gnet_stats_copy_basic(struct gnet_dump *d, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, + bool running) { - return ___gnet_stats_copy_basic(running, d, cpu, b, - TCA_STATS_BASIC); + return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running); } EXPORT_SYMBOL(gnet_stats_copy_basic); /** * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV - * @running: seqcount_t pointer * @d: dumping handle * @cpu: copy statistic per cpu * @b: basic statistics + * @running: true if @b represents a running qdisc, thus @b's + * internal values might change during basic reads. + * Only used if @cpu is NULL + * + * Context: task; must not be run from IRQ or BH contexts * * Appends the basic statistics to the top level TLV created by * gnet_stats_start_copy(). @@ -228,13 +284,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic_hw(const seqcount_t *running, - struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b) +gnet_stats_copy_basic_hw(struct gnet_dump *d, + struct gnet_stats_basic_sync __percpu *cpu, + struct gnet_stats_basic_sync *b, + bool running) { - return ___gnet_stats_copy_basic(running, d, cpu, b, - TCA_STATS_BASIC_HW); + return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running); } EXPORT_SYMBOL(gnet_stats_copy_basic_hw); @@ -282,16 +337,15 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, } EXPORT_SYMBOL(gnet_stats_copy_rate_est); -static void -__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *q) +static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *q) { int i; for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); - qstats->qlen = 0; + qstats->qlen += qcpu->backlog; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; @@ -299,24 +353,21 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, } } -void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu, - const struct gnet_stats_queue *q, - __u32 qlen) +void gnet_stats_add_queue(struct gnet_stats_queue *qstats, + const struct gnet_stats_queue __percpu *cpu, + const struct gnet_stats_queue *q) { if (cpu) { - __gnet_stats_copy_queue_cpu(qstats, cpu); + gnet_stats_add_queue_cpu(qstats, cpu); } else { - qstats->qlen = q->qlen; - qstats->backlog = q->backlog; - qstats->drops = q->drops; - qstats->requeues = q->requeues; - qstats->overlimits = q->overlimits; + qstats->qlen += q->qlen; + qstats->backlog += q->backlog; + qstats->drops += q->drops; + qstats->requeues += q->requeues; + qstats->overlimits += q->overlimits; } - - qstats->qlen = qlen; } -EXPORT_SYMBOL(__gnet_stats_copy_queue); +EXPORT_SYMBOL(gnet_stats_add_queue); /** * gnet_stats_copy_queue - copy queue statistics into statistics TLV @@ -339,7 +390,8 @@ gnet_stats_copy_queue(struct gnet_dump *d, { struct gnet_stats_queue qstats = {0}; - __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen); + gnet_stats_add_queue(&qstats, cpu_q, q); + qstats.qlen = qlen; if (d->compat_tc_stats) { d->tc_stats.drops = qstats.drops; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2d5bc3a75fae..47931c8be04b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n) list_del_init(&n->gc_list); atomic_dec(&n->tbl->gc_entries); } + if (!list_empty(&n->managed_list)) + list_del_init(&n->managed_list); } static void neigh_update_gc_list(struct neighbour *n) @@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n) write_lock_bh(&n->tbl->lock); write_lock(&n->lock); - if (n->dead) goto out; @@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n) list_add_tail(&n->gc_list, &n->tbl->gc_list); atomic_inc(&n->tbl->gc_entries); } +out: + write_unlock(&n->lock); + write_unlock_bh(&n->tbl->lock); +} +static void neigh_update_managed_list(struct neighbour *n) +{ + bool on_managed_list, add_to_managed; + + write_lock_bh(&n->tbl->lock); + write_lock(&n->lock); + if (n->dead) + goto out; + + add_to_managed = n->flags & NTF_MANAGED; + on_managed_list = !list_empty(&n->managed_list); + + if (!add_to_managed && on_managed_list) + list_del_init(&n->managed_list); + else if (add_to_managed && !on_managed_list) + list_add_tail(&n->managed_list, &n->tbl->managed_list); out: write_unlock(&n->lock); write_unlock_bh(&n->tbl->lock); } -static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, - int *notify) +static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, + bool *gc_update, bool *managed_update) { - bool rc = false; - u8 ndm_flags; + u32 ndm_flags, old_flags = neigh->flags; if (!(flags & NEIGH_UPDATE_F_ADMIN)) - return rc; + return; + + ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; + ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0; - ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; - if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { + if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) { if (ndm_flags & NTF_EXT_LEARNED) neigh->flags |= NTF_EXT_LEARNED; else neigh->flags &= ~NTF_EXT_LEARNED; - rc = true; *notify = 1; + *gc_update = true; + } + if ((old_flags ^ ndm_flags) & NTF_MANAGED) { + if (ndm_flags & NTF_MANAGED) + neigh->flags |= NTF_MANAGED; + else + neigh->flags &= ~NTF_MANAGED; + *notify = 1; + *managed_update = true; } - - return rc; } static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, @@ -379,7 +407,7 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, - bool exempt_from_gc) + u32 flags, bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -412,6 +440,7 @@ do_alloc: n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + n->flags = flags; seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); timer_setup(&n->timer, neigh_timer_handler, 0); @@ -421,6 +450,7 @@ do_alloc: refcount_set(&n->refcnt, 1); n->dead = 1; INIT_LIST_HEAD(&n->gc_list); + INIT_LIST_HEAD(&n->managed_list); atomic_inc(&tbl->entries); out: @@ -575,19 +605,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); -static struct neighbour *___neigh_create(struct neigh_table *tbl, - const void *pkey, - struct net_device *dev, - bool exempt_from_gc, bool want_ref) +static struct neighbour * +___neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, u32 flags, + bool exempt_from_gc, bool want_ref) { - struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); - u32 hash_val; - unsigned int key_len = tbl->key_len; - int error; + u32 hash_val, key_len = tbl->key_len; + struct neighbour *n1, *rc, *n; struct neigh_hash_table *nht; + int error; + n = neigh_alloc(tbl, dev, flags, exempt_from_gc); trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); - if (!n) { rc = ERR_PTR(-ENOBUFS); goto out; @@ -650,7 +679,8 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl, n->dead = 0; if (!exempt_from_gc) list_add_tail(&n->gc_list, &n->tbl->gc_list); - + if (n->flags & NTF_MANAGED) + list_add_tail(&n->managed_list, &n->tbl->managed_list); if (want_ref) neigh_hold(n); rcu_assign_pointer(n->next, @@ -674,7 +704,7 @@ out_neigh_release: struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { - return ___neigh_create(tbl, pkey, dev, false, want_ref); + return ___neigh_create(tbl, pkey, dev, 0, false, want_ref); } EXPORT_SYMBOL(__neigh_create); @@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh) } } - - /* Generic update routine. -- lladdr is new lladdr or NULL, if it is not supplied. -- new is new state. @@ -1217,7 +1245,8 @@ static void neigh_update_hhs(struct neighbour *neigh) lladdr instead of overriding it if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. - + NEIGH_UPDATE_F_USE means that the entry is user triggered. + NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed. NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as @@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh) Caller MUST hold reference count on the entry. */ - static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid, struct netlink_ext_ack *extack) { - bool ext_learn_change = false; - u8 old; - int err; - int notify = 0; - struct net_device *dev; + bool gc_update = false, managed_update = false; int update_isrouter = 0; + struct net_device *dev; + int err, notify = 0; + u8 old; trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); @@ -1254,7 +1281,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, (old & (NUD_NOARP | NUD_PERMANENT))) goto out; - ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); + neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update); + if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) { + new = old & ~NUD_PERMANENT; + neigh->nud_state = new; + err = 0; + goto out; + } if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1399,15 +1432,13 @@ out: if (update_isrouter) neigh_update_is_router(neigh, flags, ¬ify); write_unlock_bh(&neigh->lock); - - if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) + if (((new ^ old) & NUD_PERMANENT) || gc_update) neigh_update_gc_list(neigh); - + if (managed_update) + neigh_update_managed_list(neigh); if (notify) neigh_update_notify(neigh, nlmsg_pid); - trace_neigh_update_done(neigh, err); - return err; } @@ -1533,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) } EXPORT_SYMBOL(neigh_direct_output); +static void neigh_managed_work(struct work_struct *work) +{ + struct neigh_table *tbl = container_of(work, struct neigh_table, + managed_work.work); + struct neighbour *neigh; + + write_lock_bh(&tbl->lock); + list_for_each_entry(neigh, &tbl->managed_list, managed_list) + neigh_event_send(neigh, NULL); + queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, + NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); + write_unlock_bh(&tbl->lock); +} + static void neigh_proxy_process(struct timer_list *t) { struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); @@ -1679,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl) INIT_LIST_HEAD(&tbl->parms_list); INIT_LIST_HEAD(&tbl->gc_list); + INIT_LIST_HEAD(&tbl->managed_list); + list_add(&tbl->parms.list, &tbl->parms_list); write_pnet(&tbl->parms.net, &init_net); refcount_set(&tbl->parms.refcnt, 1); @@ -1710,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl) WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); rwlock_init(&tbl->lock); + INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, tbl->parms.reachable_time); + INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work); + queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0); + timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); @@ -1783,6 +1834,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, [NDA_NH_ID] = { .type = NLA_U32 }, + [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK), [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, }; @@ -1855,7 +1907,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_OVERRIDE_ISROUTER; + NEIGH_UPDATE_F_OVERRIDE_ISROUTER; struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; @@ -1864,6 +1916,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct neighbour *neigh; void *dst, *lladdr; u8 protocol = 0; + u32 ndm_flags; int err; ASSERT_RTNL(); @@ -1879,6 +1932,15 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, } ndm = nlmsg_data(nlh); + ndm_flags = ndm->ndm_flags; + if (tb[NDA_FLAGS_EXT]) { + u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]); + + BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE < + (sizeof(ndm->ndm_flags) * BITS_PER_BYTE + + hweight32(NTF_EXT_MASK))); + ndm_flags |= (ext << NTF_EXT_SHIFT); + } if (ndm->ndm_ifindex) { dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { @@ -1906,14 +1968,18 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[NDA_PROTOCOL]) protocol = nla_get_u8(tb[NDA_PROTOCOL]); - - if (ndm->ndm_flags & NTF_PROXY) { + if (ndm_flags & NTF_PROXY) { struct pneigh_entry *pn; + if (ndm_flags & NTF_MANAGED) { + NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination"); + goto out; + } + err = -ENOBUFS; pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { - pn->flags = ndm->ndm_flags; + pn->flags = ndm_flags; if (protocol) pn->protocol = protocol; err = 0; @@ -1933,16 +1999,24 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, neigh = neigh_lookup(tbl, dst, dev); if (neigh == NULL) { - bool exempt_from_gc; + bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT; + bool exempt_from_gc = ndm_permanent || + ndm_flags & NTF_EXT_LEARNED; if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { err = -ENOENT; goto out; } + if (ndm_permanent && (ndm_flags & NTF_MANAGED)) { + NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry"); + err = -EINVAL; + goto out; + } - exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || - ndm->ndm_flags & NTF_EXT_LEARNED; - neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); + neigh = ___neigh_create(tbl, dst, dev, + ndm_flags & + (NTF_EXT_LEARNED | NTF_MANAGED), + exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); goto out; @@ -1961,22 +2035,22 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (protocol) neigh->protocol = protocol; - - if (ndm->ndm_flags & NTF_EXT_LEARNED) + if (ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; - - if (ndm->ndm_flags & NTF_ROUTER) + if (ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; + if (ndm_flags & NTF_MANAGED) + flags |= NEIGH_UPDATE_F_MANAGED; + if (ndm_flags & NTF_USE) + flags |= NEIGH_UPDATE_F_USE; - if (ndm->ndm_flags & NTF_USE) { + err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, + NETLINK_CB(skb).portid, extack); + if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) { neigh_event_send(neigh, NULL); err = 0; - } else - err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, - NETLINK_CB(skb).portid, extack); - + } neigh_release(neigh); - out: return err; } @@ -2427,6 +2501,7 @@ out: static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, u32 pid, u32 seq, int type, unsigned int flags) { + u32 neigh_flags, neigh_flags_ext; unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; @@ -2436,11 +2511,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, if (nlh == NULL) return -EMSGSIZE; + neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT; + neigh_flags = neigh->flags & NTF_OLD_MASK; + ndm = nlmsg_data(nlh); ndm->ndm_family = neigh->ops->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = neigh->flags; + ndm->ndm_flags = neigh_flags; ndm->ndm_type = neigh->type; ndm->ndm_ifindex = neigh->dev->ifindex; @@ -2471,6 +2549,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) goto nla_put_failure; + if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2484,6 +2564,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, u32 pid, u32 seq, int type, unsigned int flags, struct neigh_table *tbl) { + u32 neigh_flags, neigh_flags_ext; struct nlmsghdr *nlh; struct ndmsg *ndm; @@ -2491,11 +2572,14 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (nlh == NULL) return -EMSGSIZE; + neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT; + neigh_flags = pn->flags & NTF_OLD_MASK; + ndm = nlmsg_data(nlh); ndm->ndm_family = tbl->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = pn->flags | NTF_PROXY; + ndm->ndm_flags = neigh_flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; @@ -2505,6 +2589,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) goto nla_put_failure; + if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2820,6 +2906,7 @@ static inline size_t neigh_nlmsg_size(void) + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + nla_total_size(sizeof(struct nda_cacheinfo)) + nla_total_size(4) /* NDA_PROBES */ + + nla_total_size(4) /* NDA_FLAGS_EXT */ + nla_total_size(1); /* NDA_PROTOCOL */ } @@ -2848,6 +2935,7 @@ static inline size_t pneigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + + nla_total_size(4) /* NDA_FLAGS_EXT */ + nla_total_size(1); /* NDA_PROTOCOL */ } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f6197774048b..d6e4e0b43beb 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -175,6 +175,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier) static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_carrier; this helps returning early + * without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_carrier) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_carrier); } @@ -196,6 +204,12 @@ static ssize_t speed_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); @@ -216,6 +230,12 @@ static ssize_t duplex_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); @@ -468,6 +488,14 @@ static ssize_t proto_down_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_proto_down; this helps returning + * early without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_proto_down) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_proto_down); } NETDEVICE_SHOW_RW(proto_down, fmt_dec); @@ -478,6 +506,12 @@ static ssize_t phys_port_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The check is also done in dev_get_phys_port_id; this helps returning + * early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_id) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -500,6 +534,13 @@ static ssize_t phys_port_name_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_name && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -522,6 +563,14 @@ static ssize_t phys_switch_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. This works + * because recurse is false when calling dev_get_port_parent_id. + */ + if (!netdev->netdev_ops->ndo_get_port_parent_id && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -1226,6 +1275,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (!capable(CAP_NET_ADMIN)) return -EPERM; + /* The check is also done later; this helps returning early without + * hitting the trylock/restart below. + */ + if (!dev->netdev_ops->ndo_set_tx_maxrate) + return -EOPNOTSUPP; + err = kstrtou32(buf, 10, &rate); if (err < 0) return err; @@ -1869,7 +1924,7 @@ static struct class net_class __ro_after_init = { .get_ownership = net_get_ownership, }; -#ifdef CONFIG_OF_NET +#ifdef CONFIG_OF static int of_dev_node_match(struct device *dev, const void *data) { for (; dev; dev = dev->parent) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a448a9b5bb2d..202fa5eacd0f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -473,7 +473,9 @@ struct net *copy_net_ns(unsigned long flags, if (rv < 0) { put_userns: +#ifdef CONFIG_KEYS key_remove_domain(net->key_domain); +#endif put_user_ns(user_ns); net_free(net); dec_ucounts: @@ -605,7 +607,9 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); dec_net_namespaces(net->ucounts); +#ifdef CONFIG_KEYS key_remove_domain(net->key_domain); +#endif put_user_ns(net->user_ns); net_free(net); } diff --git a/net/core/of_net.c b/net/core/of_net.c new file mode 100644 index 000000000000..f1a9bf7578e7 --- /dev/null +++ b/net/core/of_net.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * OF helpers for network devices. + * + * Initially copied out of arch/powerpc/kernel/prom_parse.c + */ +#include <linux/etherdevice.h> +#include <linux/kernel.h> +#include <linux/of_net.h> +#include <linux/of_platform.h> +#include <linux/phy.h> +#include <linux/export.h> +#include <linux/device.h> +#include <linux/nvmem-consumer.h> + +/** + * of_get_phy_mode - Get phy mode for given device_node + * @np: Pointer to the given device_node + * @interface: Pointer to the result + * + * The function gets phy interface string from property 'phy-mode' or + * 'phy-connection-type'. The index in phy_modes table is set in + * interface and 0 returned. In case of error interface is set to + * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV. + */ +int of_get_phy_mode(struct device_node *np, phy_interface_t *interface) +{ + const char *pm; + int err, i; + + *interface = PHY_INTERFACE_MODE_NA; + + err = of_property_read_string(np, "phy-mode", &pm); + if (err < 0) + err = of_property_read_string(np, "phy-connection-type", &pm); + if (err < 0) + return err; + + for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) + if (!strcasecmp(pm, phy_modes(i))) { + *interface = i; + return 0; + } + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(of_get_phy_mode); + +static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr) +{ + struct property *pp = of_find_property(np, name, NULL); + + if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) { + memcpy(addr, pp->value, ETH_ALEN); + return 0; + } + return -ENODEV; +} + +static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) +{ + struct platform_device *pdev = of_find_device_by_node(np); + struct nvmem_cell *cell; + const void *mac; + size_t len; + int ret; + + /* Try lookup by device first, there might be a nvmem_cell_lookup + * associated with a given device. + */ + if (pdev) { + ret = nvmem_get_mac_address(&pdev->dev, addr); + put_device(&pdev->dev); + return ret; + } + + cell = of_nvmem_cell_get(np, "mac-address"); + if (IS_ERR(cell)) + return PTR_ERR(cell); + + mac = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(mac)) + return PTR_ERR(mac); + + if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { + kfree(mac); + return -EINVAL; + } + + memcpy(addr, mac, ETH_ALEN); + kfree(mac); + + return 0; +} + +/** + * of_get_mac_address() + * @np: Caller's Device Node + * @addr: Pointer to a six-byte array for the result + * + * Search the device tree for the best MAC address to use. 'mac-address' is + * checked first, because that is supposed to contain to "most recent" MAC + * address. If that isn't set, then 'local-mac-address' is checked next, + * because that is the default address. If that isn't set, then the obsolete + * 'address' is checked, just in case we're using an old device tree. If any + * of the above isn't set, then try to get MAC address from nvmem cell named + * 'mac-address'. + * + * Note that the 'address' property is supposed to contain a virtual address of + * the register set, but some DTS files have redefined that property to be the + * MAC address. + * + * All-zero MAC addresses are rejected, because those could be properties that + * exist in the device tree, but were not set by U-Boot. For example, the + * DTS could define 'mac-address' and 'local-mac-address', with zero MAC + * addresses. Some older U-Boots only initialized 'local-mac-address'. In + * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists + * but is all zeros. + * + * Return: 0 on success and errno in case of error. +*/ +int of_get_mac_address(struct device_node *np, u8 *addr) +{ + int ret; + + if (!np) + return -ENODEV; + + ret = of_get_mac_addr(np, "mac-address", addr); + if (!ret) + return 0; + + ret = of_get_mac_addr(np, "local-mac-address", addr); + if (!ret) + return 0; + + ret = of_get_mac_addr(np, "address", addr); + if (!ret) + return 0; + + return of_get_mac_addr_nvmem(np, addr); +} +EXPORT_SYMBOL(of_get_mac_address); + +/** + * of_get_ethdev_address() + * @np: Caller's Device Node + * @dev: Pointer to netdevice which address will be updated + * + * Search the device tree for the best MAC address to use. + * If found set @dev->dev_addr to that address. + * + * See documentation of of_get_mac_address() for more information on how + * the best address is determined. + * + * Return: 0 on success and errno in case of error. + */ +int of_get_ethdev_address(struct device_node *np, struct net_device *dev) +{ + u8 addr[ETH_ALEN]; + int ret; + + ret = of_get_mac_address(np, addr); + if (!ret) + eth_hw_addr_set(dev, addr); + return ret; +} +EXPORT_SYMBOL(of_get_ethdev_address); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 1a6978427d6c..9b60e4301a44 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -49,6 +49,12 @@ static int page_pool_init(struct page_pool *pool, * which is the XDP_TX use-case. */ if (pool->p.flags & PP_FLAG_DMA_MAP) { + /* DMA-mapping is not supported on 32-bit systems with + * 64-bit DMA mapping. + */ + if (sizeof(dma_addr_t) > sizeof(unsigned long)) + return -EOPNOTSUPP; + if ((pool->p.dma_dir != DMA_FROM_DEVICE) && (pool->p.dma_dir != DMA_BIDIRECTIONAL)) return -EINVAL; @@ -69,10 +75,6 @@ static int page_pool_init(struct page_pool *pool, */ } - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && - pool->p.flags & PP_FLAG_PAGE_FRAG) - return -EINVAL; - if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8ccce85562a1..79477dcae7c2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -301,7 +301,7 @@ int rtnl_unregister(int protocol, int msgtype) } link = rtnl_dereference(tab[msgindex]); - rcu_assign_pointer(tab[msgindex], NULL); + RCU_INIT_POINTER(tab[msgindex], NULL); rtnl_unlock(); kfree_rcu(link, rcu); @@ -337,7 +337,7 @@ void rtnl_unregister_all(int protocol) if (!link) continue; - rcu_assign_pointer(tab[msgindex], NULL); + RCU_INIT_POINTER(tab[msgindex], NULL); kfree_rcu(link, rcu); } rtnl_unlock(); @@ -3804,9 +3804,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; - size_t if_info_size; - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags); + skb = nlmsg_new(if_nlmsg_size(dev, 0), flags); if (skb == NULL) goto errout; @@ -4384,7 +4383,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; if (br_dev != netdev_master_upper_dev_get(dev) && - !(dev->priv_flags & IFF_EBRIDGE)) + !netif_is_bridge_master(dev)) continue; cops = ops; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2170bea2c7de..74601bbc56ac 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -134,34 +134,31 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); -static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask) +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); - return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask); -} - -void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) -{ fragsz = SKB_DATA_ALIGN(fragsz); - return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); + return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); } EXPORT_SYMBOL(__napi_alloc_frag_align); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { - struct page_frag_cache *nc; void *data; fragsz = SKB_DATA_ALIGN(fragsz); if (in_hardirq() || irqs_disabled()) { - nc = this_cpu_ptr(&netdev_alloc_cache); + struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); + data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); } else { + struct napi_alloc_cache *nc; + local_bh_disable(); - data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); + nc = this_cpu_ptr(&napi_alloc_cache); + data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); local_bh_enable(); } return data; @@ -397,8 +394,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, { struct kmem_cache *cache; struct sk_buff *skb; - u8 *data; + unsigned int osize; bool pfmemalloc; + u8 *data; cache = (flags & SKB_ALLOC_FCLONE) ? skbuff_fclone_cache : skbuff_head_cache; @@ -430,7 +428,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ - size = SKB_WITH_OVERHEAD(ksize(data)); + osize = ksize(data); + size = SKB_WITH_OVERHEAD(osize); prefetchw(data + size); /* @@ -439,7 +438,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - __build_skb_around(skb, data, 0); + __build_skb_around(skb, data, osize); skb->pfmemalloc = pfmemalloc; if (flags & SKB_ALLOC_FCLONE) { diff --git a/net/core/sock.c b/net/core/sock.c index c1601f75ec4b..9862eefce21e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -350,7 +350,7 @@ void sk_error_report(struct sock *sk) } EXPORT_SYMBOL(sk_error_report); -static int sock_get_timeout(long timeo, void *optval, bool old_timeval) +int sock_get_timeout(long timeo, void *optval, bool old_timeval) { struct __kernel_sock_timeval tv; @@ -379,12 +379,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) *(struct __kernel_sock_timeval *)optval = tv; return sizeof(tv); } +EXPORT_SYMBOL(sock_get_timeout); -static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, - bool old_timeval) +int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, + sockptr_t optval, int optlen, bool old_timeval) { - struct __kernel_sock_timeval tv; - if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; @@ -393,8 +392,8 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) return -EFAULT; - tv.tv_sec = tv32.tv_sec; - tv.tv_usec = tv32.tv_usec; + tv->tv_sec = tv32.tv_sec; + tv->tv_usec = tv32.tv_usec; } else if (old_timeval) { struct __kernel_old_timeval old_tv; @@ -402,14 +401,28 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, return -EINVAL; if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) return -EFAULT; - tv.tv_sec = old_tv.tv_sec; - tv.tv_usec = old_tv.tv_usec; + tv->tv_sec = old_tv.tv_sec; + tv->tv_usec = old_tv.tv_usec; } else { - if (optlen < sizeof(tv)) + if (optlen < sizeof(*tv)) return -EINVAL; - if (copy_from_sockptr(&tv, optval, sizeof(tv))) + if (copy_from_sockptr(tv, optval, sizeof(*tv))) return -EFAULT; } + + return 0; +} +EXPORT_SYMBOL(sock_copy_user_timeval); + +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, + bool old_timeval) +{ + struct __kernel_sock_timeval tv; + int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval); + + if (err) + return err; + if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) return -EDOM; @@ -947,6 +960,53 @@ void sock_set_mark(struct sock *sk, u32 val) } EXPORT_SYMBOL(sock_set_mark); +static void sock_release_reserved_memory(struct sock *sk, int bytes) +{ + /* Round down bytes to multiple of pages */ + bytes &= ~(SK_MEM_QUANTUM - 1); + + WARN_ON(bytes > sk->sk_reserved_mem); + sk->sk_reserved_mem -= bytes; + sk_mem_reclaim(sk); +} + +static int sock_reserve_memory(struct sock *sk, int bytes) +{ + long allocated; + bool charged; + int pages; + + if (!mem_cgroup_sockets_enabled || !sk->sk_memcg) + return -EOPNOTSUPP; + + if (!bytes) + return 0; + + pages = sk_mem_pages(bytes); + + /* pre-charge to memcg */ + charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages, + GFP_KERNEL | __GFP_RETRY_MAYFAIL); + if (!charged) + return -ENOMEM; + + /* pre-charge to forward_alloc */ + allocated = sk_memory_allocated_add(sk, pages); + /* If the system goes into memory pressure with this + * precharge, give up and return error. + */ + if (allocated > sk_prot_mem_limits(sk, 1)) { + sk_memory_allocated_sub(sk, pages); + mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); + return -ENOMEM; + } + sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT; + + sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT; + + return 0; +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -1367,6 +1427,23 @@ set_sndbuf: ~SOCK_BUF_LOCK_MASK); break; + case SO_RESERVE_MEM: + { + int delta; + + if (val < 0) { + ret = -EINVAL; + break; + } + + delta = val - sk->sk_reserved_mem; + if (delta < 0) + sock_release_reserved_memory(sk, -delta); + else + ret = sock_reserve_memory(sk, delta); + break; + } + default: ret = -ENOPROTOOPT; break; @@ -1750,6 +1827,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; break; + case SO_RESERVE_MEM: + v.val = sk->sk_reserved_mem; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -2063,6 +2144,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; + newsk->sk_reserved_mem = 0; atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; diff --git a/net/core/stream.c b/net/core/stream.c index 4f1d4aa5fb38..06b36c730ce8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -195,14 +195,11 @@ void sk_stream_kill_queues(struct sock *sk) /* First the read buffer. */ __skb_queue_purge(&sk->sk_receive_queue); - /* Next, the error queue. */ - __skb_queue_purge(&sk->sk_error_queue); - /* Next, the write queue. */ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ - sk_mem_reclaim(sk); + sk_mem_reclaim_final(sk); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index c5c1d2b8045e..5183e627468d 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -48,7 +48,7 @@ extern bool dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern struct percpu_counter dccp_orphan_count; +DECLARE_PER_CPU(unsigned int, dccp_orphan_count); void dccp_time_wait(struct sock *sk, int state, int timeo); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index abb5c596a817..fc44dadc778b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -42,8 +42,8 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -struct percpu_counter dccp_orphan_count; -EXPORT_SYMBOL_GPL(dccp_orphan_count); +DEFINE_PER_CPU(unsigned int, dccp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); @@ -1055,7 +1055,7 @@ adjudge_to_death: bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(dccp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) @@ -1115,13 +1115,10 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > sizeof_field(struct sk_buff, cb)); - rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); - if (rc) - goto out_fail; inet_hashinfo_init(&dccp_hashinfo); rc = inet_hashinfo2_init_mod(&dccp_hashinfo); if (rc) - goto out_free_percpu; + goto out_fail; rc = -ENOBUFS; dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", @@ -1226,8 +1223,6 @@ out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); out_free_hashinfo2: inet_hashinfo2_free_mod(&dccp_hashinfo); -out_free_percpu: - percpu_counter_destroy(&dccp_orphan_count); out_fail: dccp_hashinfo.bhash = NULL; dccp_hashinfo.ehash = NULL; @@ -1250,7 +1245,6 @@ static void __exit dccp_fini(void) dccp_ackvec_exit(); dccp_sysctl_exit(); inet_hashinfo2_free_mod(&dccp_hashinfo); - percpu_counter_destroy(&dccp_orphan_count); } module_init(dccp_init); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index d8ee15f1c7a9..8cb87b5067ee 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -92,13 +92,6 @@ config NET_DSA_TAG_KSZ Say Y if you want to enable support for tagging frames for the Microchip 8795/9477/9893 families of switches. -config NET_DSA_TAG_RTL4_A - tristate "Tag driver for Realtek 4 byte protocol A tags" - help - Say Y or M if you want to enable support for tagging frames for the - Realtek switches with 4 byte protocol A tags, sich as found in - the Realtek RTL8366RB. - config NET_DSA_TAG_OCELOT tristate "Tag driver for Ocelot family of switches, using NPI port" select PACKING @@ -126,6 +119,19 @@ config NET_DSA_TAG_QCA Say Y or M if you want to enable support for tagging frames for the Qualcomm Atheros QCA8K switches. +config NET_DSA_TAG_RTL4_A + tristate "Tag driver for Realtek 4 byte protocol A tags" + help + Say Y or M if you want to enable support for tagging frames for the + Realtek switches with 4 byte protocol A tags, sich as found in + the Realtek RTL8366RB. + +config NET_DSA_TAG_RTL8_4 + tristate "Tag driver for Realtek 8 byte protocol 4 tags" + help + Say Y or M if you want to enable support for tagging frames for Realtek + switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC. + config NET_DSA_TAG_LAN9303 tristate "Tag driver for SMSC/Microchip LAN9303 family of switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 67ea009f242c..9f75820e7c98 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -10,12 +10,13 @@ obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o -obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o +obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 41f36ad8b0ec..ea5169e671ae 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -280,23 +280,22 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } #ifdef CONFIG_PM_SLEEP -static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) +static bool dsa_port_is_initialized(const struct dsa_port *dp) { - const struct dsa_port *dp = dsa_to_port(ds, p); - return dp->type == DSA_PORT_TYPE_USER && dp->slave; } int dsa_switch_suspend(struct dsa_switch *ds) { - int i, ret = 0; + struct dsa_port *dp; + int ret = 0; /* Suspend slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave); + ret = dsa_slave_suspend(dp->slave); if (ret) return ret; } @@ -310,7 +309,8 @@ EXPORT_SYMBOL_GPL(dsa_switch_suspend); int dsa_switch_resume(struct dsa_switch *ds) { - int i, ret = 0; + struct dsa_port *dp; + int ret = 0; if (ds->ops->resume) ret = ds->ops->resume(ds); @@ -319,11 +319,11 @@ int dsa_switch_resume(struct dsa_switch *ds) return ret; /* Resume slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_resume(dsa_to_port(ds, i)->slave); + ret = dsa_slave_resume(dp->slave); if (ret) return ret; } diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index e9911b18bdbf..f5270114dcb8 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -399,11 +399,8 @@ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) if (!dsa_port_is_cpu(cpu_dp)) continue; - list_for_each_entry(dp, &dst->ports, list) { - /* Prefer a local CPU port */ - if (dp->ds != cpu_dp->ds) - continue; - + /* Prefer a local CPU port */ + dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ if (dp->cpu_dp) continue; @@ -802,17 +799,16 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) { const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; struct dsa_switch_tree *dst = ds->dst; - int port, err; + struct dsa_port *cpu_dp; + int err; if (tag_ops->proto == dst->default_proto) return 0; - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_cpu_port(ds, port)) - continue; - + dsa_switch_for_each_cpu_port(cpu_dp, ds) { rtnl_lock(); - err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); + err = ds->ops->change_tag_protocol(ds, cpu_dp->index, + tag_ops->proto); rtnl_unlock(); if (err) { dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", @@ -850,19 +846,13 @@ static int dsa_switch_setup(struct dsa_switch *ds) dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - err = devlink_register(ds->devlink); - if (err) - goto free_devlink; - /* Setup devlink port instances now, so that the switch * setup() can register regions etc, against the ports */ - list_for_each_entry(dp, &ds->dst->ports, list) { - if (dp->ds == ds) { - err = dsa_port_devlink_setup(dp); - if (err) - goto unregister_devlink_ports; - } + dsa_switch_for_each_port(dp, ds) { + err = dsa_port_devlink_setup(dp); + if (err) + goto unregister_devlink_ports; } err = dsa_switch_register_notifier(ds); @@ -879,8 +869,6 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err) goto teardown; - devlink_params_publish(ds->devlink); - if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) { @@ -896,7 +884,7 @@ static int dsa_switch_setup(struct dsa_switch *ds) } ds->setup = true; - + devlink_register(ds->devlink); return 0; free_slave_mii_bus: @@ -908,14 +896,10 @@ teardown: unregister_notifier: dsa_switch_unregister_notifier(ds); unregister_devlink_ports: - list_for_each_entry(dp, &ds->dst->ports, list) - if (dp->ds == ds) - dsa_port_devlink_teardown(dp); - devlink_unregister(ds->devlink); -free_devlink: + dsa_switch_for_each_port(dp, ds) + dsa_port_devlink_teardown(dp); devlink_free(ds->devlink); ds->devlink = NULL; - return err; } @@ -926,22 +910,23 @@ static void dsa_switch_teardown(struct dsa_switch *ds) if (!ds->setup) return; + if (ds->devlink) + devlink_unregister(ds->devlink); + if (ds->slave_mii_bus && ds->ops->phy_read) { mdiobus_unregister(ds->slave_mii_bus); mdiobus_free(ds->slave_mii_bus); ds->slave_mii_bus = NULL; } - dsa_switch_unregister_notifier(ds); - if (ds->ops->teardown) ds->ops->teardown(ds); + dsa_switch_unregister_notifier(ds); + if (ds->devlink) { - list_for_each_entry(dp, &ds->dst->ports, list) - if (dp->ds == ds) - dsa_port_devlink_teardown(dp); - devlink_unregister(ds->devlink); + dsa_switch_for_each_port(dp, ds) + dsa_port_devlink_teardown(dp); devlink_free(ds->devlink); ds->devlink = NULL; } @@ -1157,7 +1142,7 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, goto out_unlock; list_for_each_entry(dp, &dst->ports, list) { - if (!dsa_is_user_port(dp->ds, dp->index)) + if (!dsa_port_is_user(dp)) continue; if (dp->slave->flags & IFF_UP) @@ -1188,8 +1173,8 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp; - list_for_each_entry(dp, &dst->ports, list) - if (dp->ds == ds && dp->index == index) + dsa_switch_for_each_port(dp, ds) + if (dp->index == index) return dp; dp = kzalloc(sizeof(*dp), GFP_KERNEL); @@ -1535,12 +1520,9 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { - struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp, *next; - list_for_each_entry_safe(dp, next, &dst->ports, list) { - if (dp->ds != ds) - continue; + dsa_switch_for_each_port_safe(dp, next, ds) { list_del(&dp->list); kfree(dp); } @@ -1632,13 +1614,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) mutex_lock(&dsa2_mutex); rtnl_lock(); - list_for_each_entry(dp, &ds->dst->ports, list) { - if (dp->ds != ds) - continue; - - if (!dsa_port_is_user(dp)) - continue; - + dsa_switch_for_each_user_port(dp, ds) { master = dp->cpu_dp->master; slave_dev = dp->slave; diff --git a/net/dsa/port.c b/net/dsa/port.c index 616330a16d31..bf671306b560 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -515,14 +515,15 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; - int err, i; + struct dsa_port *other_dp; + int err; /* VLAN awareness was off, so the question is "can we turn it on". * We may have had 8021q uppers, those need to go. Make sure we don't * enter an inconsistent state: deny changing the VLAN awareness state * as long as we have 8021q uppers. */ - if (vlan_filtering && dsa_is_user_port(ds, dp->index)) { + if (vlan_filtering && dsa_port_is_user(dp)) { struct net_device *upper_dev, *slave = dp->slave; struct net_device *br = dp->bridge_dev; struct list_head *iter; @@ -557,10 +558,10 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, * different ports of the same switch device and one of them has a * different setting than what is being requested. */ - for (i = 0; i < ds->num_ports; i++) { + dsa_switch_for_each_port(other_dp, ds) { struct net_device *other_bridge; - other_bridge = dsa_to_port(ds, i)->bridge_dev; + other_bridge = other_dp->bridge_dev; if (!other_bridge) continue; /* If it's the same bridge, it also has same @@ -607,20 +608,16 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, return err; if (ds->vlan_filtering_is_global) { - int port; + struct dsa_port *other_dp; ds->vlan_filtering = vlan_filtering; - for (port = 0; port < ds->num_ports; port++) { - struct net_device *slave; - - if (!dsa_is_user_port(ds, port)) - continue; + dsa_switch_for_each_user_port(other_dp, ds) { + struct net_device *slave = dp->slave; /* We might be called in the unbind path, so not * all slave devices might still be registered. */ - slave = dsa_to_port(ds, port)->slave; if (!slave) continue; @@ -1041,7 +1038,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, struct phy_device *phydev = NULL; struct dsa_switch *ds = dp->ds; - if (dsa_is_user_port(ds, dp->index)) + if (dsa_port_is_user(dp)) phydev = dp->slave->phydev; if (!ds->ops->phylink_mac_link_down) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a2bf2d8ac65b..9d9fef668eba 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -174,7 +174,7 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a) dev_uc_del(master, dev->dev_addr); out: - ether_addr_copy(dev->dev_addr, addr->sa_data); + eth_hw_addr_set(dev, addr->sa_data); return 0; } @@ -789,6 +789,37 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) return -EOPNOTSUPP; } +static void dsa_slave_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_phy_stats) + ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); +} + +static void dsa_slave_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_mac_stats) + ds->ops->get_eth_mac_stats(ds, dp->index, mac_stats); +} + +static void +dsa_slave_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_eth_ctrl_stats) + ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats); +} + static void dsa_slave_net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf) { @@ -1695,6 +1726,9 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_strings = dsa_slave_get_strings, .get_ethtool_stats = dsa_slave_get_ethtool_stats, .get_sset_count = dsa_slave_get_sset_count, + .get_eth_phy_stats = dsa_slave_get_eth_phy_stats, + .get_eth_mac_stats = dsa_slave_get_eth_mac_stats, + .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats, .set_wol = dsa_slave_set_wol, .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, @@ -1954,7 +1988,7 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!is_zero_ether_addr(port->mac)) - ether_addr_copy(slave_dev->dev_addr, port->mac); + eth_hw_addr_set(slave_dev, port->mac); else eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; @@ -2334,7 +2368,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, dst = cpu_dp->ds->dst; list_for_each_entry(dp, &dst->ports, list) { - if (!dsa_is_user_port(dp->ds, dp->index)) + if (!dsa_port_is_user(dp)) continue; list_add(&dp->slave->close_list, &close_list); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 6466d0539af9..2b1b21bde830 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -17,14 +17,11 @@ static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) { - int i; - - for (i = 0; i < ds->num_ports; ++i) { - struct dsa_port *dp = dsa_to_port(ds, i); + struct dsa_port *dp; + dsa_switch_for_each_port(dp, ds) if (dp->ageing_time && dp->ageing_time < ageing_time) ageing_time = dp->ageing_time; - } return ageing_time; } @@ -49,10 +46,10 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, return 0; } -static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, - struct dsa_notifier_mtu_info *info) +static bool dsa_port_mtu_match(struct dsa_port *dp, + struct dsa_notifier_mtu_info *info) { - if (ds->index == info->sw_index && port == info->port) + if (dp->ds->index == info->sw_index && dp->index == info->port) return true; /* Do not propagate to other switches in the tree if the notifier was @@ -61,7 +58,7 @@ static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, if (info->targeted_match) return false; - if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) return true; return false; @@ -70,14 +67,16 @@ static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, static int dsa_switch_mtu(struct dsa_switch *ds, struct dsa_notifier_mtu_info *info) { - int port, ret; + struct dsa_port *dp; + int ret; if (!ds->ops->port_change_mtu) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_mtu_match(ds, port, info)) { - ret = ds->ops->port_change_mtu(ds, port, info->mtu); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_mtu_match(dp, info)) { + ret = ds->ops->port_change_mtu(ds, dp->index, + info->mtu); if (ret) return ret; } @@ -120,7 +119,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; bool vlan_filtering; - int err, port; + struct dsa_port *dp; + int err; if (dst->index == info->tree_index && ds->index == info->sw_index && ds->ops->port_bridge_leave) @@ -150,10 +150,10 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * VLAN-aware bridge. */ if (change_vlan_filtering && ds->vlan_filtering_is_global) { - for (port = 0; port < ds->num_ports; port++) { + dsa_switch_for_each_port(dp, ds) { struct net_device *bridge_dev; - bridge_dev = dsa_to_port(ds, port)->bridge_dev; + bridge_dev = dp->bridge_dev; if (bridge_dev && br_vlan_enabled(bridge_dev)) { change_vlan_filtering = false; @@ -179,19 +179,19 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * DSA links) that sit between the targeted port on which the notifier was * emitted and its dedicated CPU port. */ -static bool dsa_switch_host_address_match(struct dsa_switch *ds, int port, - int info_sw_index, int info_port) +static bool dsa_port_host_address_match(struct dsa_port *dp, + int info_sw_index, int info_port) { struct dsa_port *targeted_dp, *cpu_dp; struct dsa_switch *targeted_ds; - targeted_ds = dsa_switch_find(ds->dst->index, info_sw_index); + targeted_ds = dsa_switch_find(dp->ds->dst->index, info_sw_index); targeted_dp = dsa_to_port(targeted_ds, info_port); cpu_dp = targeted_dp->cpu_dp; - if (dsa_switch_is_upstream_of(ds, targeted_ds)) - return port == dsa_towards_port(ds, cpu_dp->ds->index, - cpu_dp->index); + if (dsa_switch_is_upstream_of(dp->ds, targeted_ds)) + return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index, + cpu_dp->index); return false; } @@ -209,11 +209,12 @@ static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list, return NULL; } -static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_port_do_mdb_add(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -244,11 +245,12 @@ static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int dsa_port_do_mdb_del(struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -274,11 +276,12 @@ static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) +static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -309,11 +312,12 @@ static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid) +static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -342,17 +346,16 @@ static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port, static int dsa_switch_host_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_fdb_add(ds, port, info->addr, - info->vid); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_fdb_add(dp, info->addr, info->vid); if (err) break; } @@ -364,17 +367,16 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, static int dsa_switch_host_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_fdb_del(ds, port, info->addr, - info->vid); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_fdb_del(dp, info->addr, info->vid); if (err) break; } @@ -387,22 +389,24 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - return dsa_switch_do_fdb_add(ds, port, info->addr, info->vid); + return dsa_port_do_fdb_add(dp, info->addr, info->vid); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return dsa_switch_do_fdb_del(ds, port, info->addr, info->vid); + return dsa_port_do_fdb_del(dp, info->addr, info->vid); } static int dsa_switch_hsr_join(struct dsa_switch *ds, @@ -468,37 +472,39 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - return dsa_switch_do_mdb_add(ds, port, info->mdb); + return dsa_port_do_mdb_add(dp, info->mdb); } static int dsa_switch_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { int port = dsa_towards_port(ds, info->sw_index, info->port); + struct dsa_port *dp = dsa_to_port(ds, port); if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - return dsa_switch_do_mdb_del(ds, port, info->mdb); + return dsa_port_do_mdb_del(dp, info->mdb); } static int dsa_switch_host_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_mdb_add(ds, port, info->mdb); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_mdb_add(dp, info->mdb); if (err) break; } @@ -510,16 +516,16 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds, static int dsa_switch_host_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { + struct dsa_port *dp; int err = 0; - int port; if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_host_address_match(ds, port, info->sw_index, - info->port)) { - err = dsa_switch_do_mdb_del(ds, port, info->mdb); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_host_address_match(dp, info->sw_index, + info->port)) { + err = dsa_port_do_mdb_del(dp, info->mdb); if (err) break; } @@ -528,13 +534,13 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds, return err; } -static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, - struct dsa_notifier_vlan_info *info) +static bool dsa_port_vlan_match(struct dsa_port *dp, + struct dsa_notifier_vlan_info *info) { - if (ds->index == info->sw_index && port == info->port) + if (dp->ds->index == info->sw_index && dp->index == info->port) return true; - if (dsa_is_dsa_port(ds, port)) + if (dsa_port_is_dsa(dp)) return true; return false; @@ -543,14 +549,15 @@ static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, static int dsa_switch_vlan_add(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { - int port, err; + struct dsa_port *dp; + int err; if (!ds->ops->port_vlan_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_vlan_match(ds, port, info)) { - err = ds->ops->port_vlan_add(ds, port, info->vlan, + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_vlan_match(dp, info)) { + err = ds->ops->port_vlan_add(ds, dp->index, info->vlan, info->extack); if (err) return err; @@ -579,38 +586,34 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, struct dsa_notifier_tag_proto_info *info) { const struct dsa_device_ops *tag_ops = info->tag_ops; - int port, err; + struct dsa_port *dp, *cpu_dp; + int err; if (!ds->ops->change_tag_protocol) return -EOPNOTSUPP; ASSERT_RTNL(); - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_cpu_port(ds, port)) - continue; - - err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto); + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + err = ds->ops->change_tag_protocol(ds, cpu_dp->index, + tag_ops->proto); if (err) return err; - dsa_port_set_tag_protocol(dsa_to_port(ds, port), tag_ops); + dsa_port_set_tag_protocol(cpu_dp, tag_ops); } /* Now that changing the tag protocol can no longer fail, let's update * the remaining bits which are "duplicated for faster access", and the * bits that depend on the tagger, such as the MTU. */ - for (port = 0; port < ds->num_ports; port++) { - if (dsa_is_user_port(ds, port)) { - struct net_device *slave; + dsa_switch_for_each_user_port(dp, ds) { + struct net_device *slave = dp->slave; - slave = dsa_to_port(ds, port)->slave; - dsa_slave_setup_tagger(slave); + dsa_slave_setup_tagger(slave); - /* rtnl_mutex is held in dsa_tree_change_tag_proto */ - dsa_slave_change_mtu(slave, slave->mtu); - } + /* rtnl_mutex is held in dsa_tree_change_tag_proto */ + dsa_slave_change_mtu(slave, slave->mtu); } return 0; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index f8f7b7c34e7d..72cac2c0af7b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -6,7 +6,6 @@ * dsa_8021q_netdev_ops is registered for API compliance and not used * directly by callers. */ -#include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/dsa/8021q.h> @@ -78,22 +77,22 @@ EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid); /* Returns the VID to be inserted into the frame from xmit for switch steering * instructions on egress. Encodes switch ID and port ID. */ -u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) +u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp) { - return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port); + return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) | + DSA_8021Q_PORT(dp->index); } -EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid); /* Returns the VID that will be installed as pvid for this switch port, sent as * tagged egress towards the CPU port and decoded by the rcv function. */ -u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) +u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp) { - return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | - DSA_8021Q_PORT(port); + return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) | + DSA_8021Q_PORT(dp->index); } -EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); +EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid); /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) @@ -139,12 +138,13 @@ dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid) return NULL; } -static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port, - u16 vid, u16 flags) +static int dsa_port_do_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, + u16 flags) { - struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; + struct dsa_switch *ds = dp->ds; struct dsa_tag_8021q_vlan *v; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -175,12 +175,12 @@ static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port, return 0; } -static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port, - u16 vid) +static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) { - struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_8021q_context *ctx = dp->ds->tag_8021q_ctx; + struct dsa_switch *ds = dp->ds; struct dsa_tag_8021q_vlan *v; + int port = dp->index; int err; /* No need to bother with refcounting for user ports */ @@ -207,14 +207,16 @@ static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port, } static bool -dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port, - struct dsa_notifier_tag_8021q_vlan_info *info) +dsa_port_tag_8021q_vlan_match(struct dsa_port *dp, + struct dsa_notifier_tag_8021q_vlan_info *info) { - if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + struct dsa_switch *ds = dp->ds; + + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) return true; if (ds->dst->index == info->tree_index && ds->index == info->sw_index) - return port == info->port; + return dp->index == info->port; return false; } @@ -222,7 +224,8 @@ dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port, int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info) { - int port, err; + struct dsa_port *dp; + int err; /* Since we use dsa_broadcast(), there might be other switches in other * trees which don't support tag_8021q, so don't return an error. @@ -232,21 +235,20 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx) return 0; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_tag_8021q_vlan_match(dp, info)) { u16 flags = 0; - if (dsa_is_user_port(ds, port)) + if (dsa_port_is_user(dp)) flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (vid_is_dsa_8021q_rxvlan(info->vid) && dsa_8021q_rx_switch_id(info->vid) == ds->index && - dsa_8021q_rx_source_port(info->vid) == port) + dsa_8021q_rx_source_port(info->vid) == dp->index) flags |= BRIDGE_VLAN_INFO_PVID; - err = dsa_switch_do_tag_8021q_vlan_add(ds, port, - info->vid, - flags); + err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid, + flags); if (err) return err; } @@ -258,15 +260,15 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info) { - int port, err; + struct dsa_port *dp; + int err; if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx) return 0; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) { - err = dsa_switch_do_tag_8021q_vlan_del(ds, port, - info->vid); + dsa_switch_for_each_port(dp, ds) { + if (dsa_port_tag_8021q_vlan_match(dp, info)) { + err = dsa_port_do_tag_8021q_vlan_del(dp, info->vid); if (err) return err; } @@ -322,15 +324,14 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 */ -static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port, - struct dsa_notifier_bridge_info *info) +static bool +dsa_port_tag_8021q_bridge_match(struct dsa_port *dp, + struct dsa_notifier_bridge_info *info) { - struct dsa_port *dp = dsa_to_port(ds, port); - /* Don't match on self */ - if (ds->dst->index == info->tree_index && - ds->index == info->sw_index && - port == info->port) + if (dp->ds->dst->index == info->tree_index && + dp->ds->index == info->sw_index && + dp->index == info->port) return false; if (dsa_port_is_user(dp)) @@ -344,21 +345,21 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, { struct dsa_switch *targeted_ds; struct dsa_port *targeted_dp; + struct dsa_port *dp; u16 targeted_rx_vid; - int err, port; + int err; if (!ds->tag_8021q_ctx) return 0; targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); - for (port = 0; port < ds->num_ports; port++) { - struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); + dsa_switch_for_each_port(dp, ds) { + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - if (!dsa_tag_8021q_bridge_match(ds, port, info)) + if (!dsa_port_tag_8021q_bridge_match(dp, info)) continue; /* Install the RX VID of the targeted port in our VLAN table */ @@ -380,21 +381,20 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, { struct dsa_switch *targeted_ds; struct dsa_port *targeted_dp; + struct dsa_port *dp; u16 targeted_rx_vid; - int port; if (!ds->tag_8021q_ctx) return 0; targeted_ds = dsa_switch_find(info->tree_index, info->sw_index); targeted_dp = dsa_to_port(targeted_ds, info->port); - targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port); + targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp); - for (port = 0; port < ds->num_ports; port++) { - struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); + dsa_switch_for_each_port(dp, ds) { + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); - if (!dsa_tag_8021q_bridge_match(ds, port, info)) + if (!dsa_port_tag_8021q_bridge_match(dp, info)) continue; /* Remove the RX VID of the targeted port from our VLAN table */ @@ -433,8 +433,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ds, port); + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); struct net_device *master; int err; @@ -478,8 +478,8 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) { struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); - u16 rx_vid = dsa_8021q_rx_vid(ds, port); - u16 tx_vid = dsa_8021q_tx_vid(ds, port); + u16 rx_vid = dsa_tag_8021q_rx_vid(dp); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); struct net_device *master; /* The CPU port is implicitly configured by diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index fa1d60d13ad9..3509fc967ca9 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -6,7 +6,6 @@ #include <linux/etherdevice.h> #include <linux/list.h> -#include <linux/slab.h> #include <net/dsa.h> #include "dsa_priv.h" diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 605b51ca6921..cd60b94fc175 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -4,15 +4,52 @@ #include <linux/dsa/ocelot.h> #include "dsa_priv.h" +/* If the port is under a VLAN-aware bridge, remove the VLAN header from the + * payload and move it into the DSA tag, which will make the switch classify + * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, + * which is the pvid of standalone and VLAN-unaware bridge ports. + */ +static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, + u64 *vlan_tci, u64 *tag_type) +{ + struct net_device *br = READ_ONCE(dp->bridge_dev); + struct vlan_ethhdr *hdr; + u16 proto, tci; + + if (!br || !br_vlan_enabled(br)) { + *vlan_tci = 0; + *tag_type = IFH_TAG_TYPE_C; + return; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + br_vlan_get_proto(br, &proto); + + if (ntohs(hdr->h_vlan_proto) == proto) { + __skb_vlan_pop(skb, &tci); + *vlan_tci = tci; + } else { + rcu_read_lock(); + br_vlan_get_pvid_rcu(br, &tci); + rcu_read_unlock(); + *vlan_tci = tci; + } + + *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +} + static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { struct dsa_port *dp = dsa_slave_to_port(netdev); struct dsa_switch *ds = dp->ds; + u64 vlan_tci, tag_type; void *injection; __be32 *prefix; u32 rew_op = 0; + ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + injection = skb_push(skb, OCELOT_TAG_LEN); prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN); @@ -21,6 +58,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, ocelot_ifh_set_bypass(injection, 1); ocelot_ifh_set_src(injection, ds->num_ports); ocelot_ifh_set_qos_class(injection, skb->priority); + ocelot_ifh_set_vlan_tci(injection, vlan_tci); + ocelot_ifh_set_tag_type(injection, tag_type); rew_op = ocelot_ptp_rew_op(skb); if (rew_op) diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 3412051981d7..a1919ea5e828 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -39,9 +39,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); - u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); struct ethhdr *hdr = eth_hdr(skb); if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest)) diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index f920487ae145..6d928ee3ef7a 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -54,7 +54,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, p = (__be16 *)tag; *p = htons(RTL4_A_ETHERTYPE); - out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT) | (2 << 8); + out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT); /* The lower bits indicate the port number */ out |= BIT(dp->index); diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c new file mode 100644 index 000000000000..02686ad4045d --- /dev/null +++ b/net/dsa/tag_rtl8_4.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handler for Realtek 8 byte switch tags + * + * Copyright (C) 2021 Alvin Å ipraga <alsi@bang-olufsen.dk> + * + * NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence + * named tag_rtl8_4. + * + * This tag header has the following format: + * + * ------------------------------------------- + * | MAC DA | MAC SA | 8 byte tag | Type | ... + * ------------------------------------------- + * _______________/ \______________________________________ + * / \ + * 0 7|8 15 + * |-----------------------------------+-----------------------------------|--- + * | (16-bit) | ^ + * | Realtek EtherType [0x8899] | | + * |-----------------------------------+-----------------------------------| 8 + * | (8-bit) | (8-bit) | + * | Protocol [0x04] | REASON | b + * |-----------------------------------+-----------------------------------| y + * | (1) | (1) | (2) | (1) | (3) | (1) | (1) | (1) | (5) | t + * | FID_EN | X | FID | PRI_EN | PRI | KEEP | X | LEARN_DIS | X | e + * |-----------------------------------+-----------------------------------| s + * | (1) | (15-bit) | | + * | ALLOW | TX/RX | v + * |-----------------------------------+-----------------------------------|--- + * + * With the following field descriptions: + * + * field | description + * ------------+------------- + * Realtek | 0x8899: indicates that this is a proprietary Realtek tag; + * EtherType | note that Realtek uses the same EtherType for + * | other incompatible tag formats (e.g. tag_rtl4_a.c) + * Protocol | 0x04: indicates that this tag conforms to this format + * X | reserved + * ------------+------------- + * REASON | reason for forwarding packet to CPU + * | 0: packet was forwarded or flooded to CPU + * | 80: packet was trapped to CPU + * FID_EN | 1: packet has an FID + * | 0: no FID + * FID | FID of packet (if FID_EN=1) + * PRI_EN | 1: force priority of packet + * | 0: don't force priority + * PRI | priority of packet (if PRI_EN=1) + * KEEP | preserve packet VLAN tag format + * LEARN_DIS | don't learn the source MAC address of the packet + * ALLOW | 1: treat TX/RX field as an allowance port mask, meaning the + * | packet may only be forwarded to ports specified in the + * | mask + * | 0: no allowance port mask, TX/RX field is the forwarding + * | port mask + * TX/RX | TX (switch->CPU): port number the packet was received on + * | RX (CPU->switch): forwarding port mask (if ALLOW=0) + * | allowance port mask (if ALLOW=1) + */ + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/etherdevice.h> + +#include "dsa_priv.h" + +/* Protocols supported: + * + * 0x04 = RTL8365MB DSA protocol + */ + +#define RTL8_4_TAG_LEN 8 + +#define RTL8_4_PROTOCOL GENMASK(15, 8) +#define RTL8_4_PROTOCOL_RTL8365MB 0x04 +#define RTL8_4_REASON GENMASK(7, 0) +#define RTL8_4_REASON_FORWARD 0 +#define RTL8_4_REASON_TRAP 80 + +#define RTL8_4_LEARN_DIS BIT(5) + +#define RTL8_4_TX GENMASK(3, 0) +#define RTL8_4_RX GENMASK(10, 0) + +static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 *tag; + + skb_push(skb, RTL8_4_TAG_LEN); + + dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN); + tag = dsa_etype_header_pos_tx(skb); + + /* Set Realtek EtherType */ + tag[0] = htons(ETH_P_REALTEK); + + /* Set Protocol; zero REASON */ + tag[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB)); + + /* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */ + tag[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1)); + + /* Zero ALLOW; set RX (CPU->switch) forwarding port mask */ + tag[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index))); + + return skb; +} + +static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + __be16 *tag; + u16 etype; + u8 reason; + u8 proto; + u8 port; + + if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN))) + return NULL; + + tag = dsa_etype_header_pos_rx(skb); + + /* Parse Realtek EtherType */ + etype = ntohs(tag[0]); + if (unlikely(etype != ETH_P_REALTEK)) { + dev_warn_ratelimited(&dev->dev, + "non-realtek ethertype 0x%04x\n", etype); + return NULL; + } + + /* Parse Protocol */ + proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag[1])); + if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) { + dev_warn_ratelimited(&dev->dev, + "unknown realtek protocol 0x%02x\n", + proto); + return NULL; + } + + /* Parse REASON */ + reason = FIELD_GET(RTL8_4_REASON, ntohs(tag[1])); + + /* Parse TX (switch->CPU) */ + port = FIELD_GET(RTL8_4_TX, ntohs(tag[3])); + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) { + dev_warn_ratelimited(&dev->dev, + "could not find slave for port %d\n", + port); + return NULL; + } + + /* Remove tag and recalculate checksum */ + skb_pull_rcsum(skb, RTL8_4_TAG_LEN); + + dsa_strip_etype_header(skb, RTL8_4_TAG_LEN); + + if (reason != RTL8_4_REASON_TRAP) + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops rtl8_4_netdev_ops = { + .name = "rtl8_4", + .proto = DSA_TAG_PROTO_RTL8_4, + .xmit = rtl8_4_tag_xmit, + .rcv = rtl8_4_tag_rcv, + .needed_headroom = RTL8_4_TAG_LEN, +}; +module_dsa_tag_driver(rtl8_4_netdev_ops); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 2edede9ddac9..262c8833a910 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -158,10 +158,7 @@ static u16 sja1105_xmit_tpid(struct dsa_port *dp) * we're sure about that). It may not be on this port though, so we * need to find it. */ - list_for_each_entry(other_dp, &ds->dst->ports, list) { - if (other_dp->ds != ds) - continue; - + dsa_switch_for_each_port(other_dp, ds) { if (!other_dp->bridge_dev) continue; @@ -238,9 +235,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); - u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); if (skb->offload_fwd_mark) return sja1105_imprecise_xmit(skb, netdev); @@ -266,9 +263,9 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, { struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; struct dsa_port *dp = dsa_slave_to_port(netdev); - u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + u16 tx_vid = dsa_tag_8021q_tx_vid(dp); __be32 *tx_trailer; __be16 *tx_header; int trailer_pos; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 73fce9467467..c7d9e08107cb 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -51,6 +51,7 @@ #include <linux/if_ether.h> #include <linux/of_net.h> #include <linux/pci.h> +#include <linux/property.h> #include <net/dst.h> #include <net/arp.h> #include <net/sock.h> @@ -304,7 +305,7 @@ void eth_commit_mac_addr_change(struct net_device *dev, void *p) { struct sockaddr *addr = p; - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + eth_hw_addr_set(dev, addr->sa_data); } EXPORT_SYMBOL(eth_commit_mac_addr_change); @@ -523,6 +524,26 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) EXPORT_SYMBOL(eth_platform_get_mac_address); /** + * platform_get_ethdev_address - Set netdev's MAC address from a given device + * @dev: Pointer to the device + * @netdev: Pointer to netdev to write the address to + * + * Wrapper around eth_platform_get_mac_address() which writes the address + * directly to netdev->dev_addr. + */ +int platform_get_ethdev_address(struct device *dev, struct net_device *netdev) +{ + u8 addr[ETH_ALEN] __aligned(2); + int ret; + + ret = eth_platform_get_mac_address(dev, addr); + if (!ret) + eth_hw_addr_set(netdev, addr); + return ret; +} +EXPORT_SYMBOL(platform_get_ethdev_address); + +/** * nvmem_get_mac_address - Obtain the MAC address from an nvmem cell named * 'mac-address' associated with given device. * @@ -557,4 +578,81 @@ int nvmem_get_mac_address(struct device *dev, void *addrbuf) return 0; } -EXPORT_SYMBOL(nvmem_get_mac_address); + +static int fwnode_get_mac_addr(struct fwnode_handle *fwnode, + const char *name, char *addr) +{ + int ret; + + ret = fwnode_property_read_u8_array(fwnode, name, addr, ETH_ALEN); + if (ret) + return ret; + + if (!is_valid_ether_addr(addr)) + return -EINVAL; + return 0; +} + +/** + * fwnode_get_mac_address - Get the MAC from the firmware node + * @fwnode: Pointer to the firmware node + * @addr: Address of buffer to store the MAC in + * + * Search the firmware node for the best MAC address to use. 'mac-address' is + * checked first, because that is supposed to contain to "most recent" MAC + * address. If that isn't set, then 'local-mac-address' is checked next, + * because that is the default address. If that isn't set, then the obsolete + * 'address' is checked, just in case we're using an old device tree. + * + * Note that the 'address' property is supposed to contain a virtual address of + * the register set, but some DTS files have redefined that property to be the + * MAC address. + * + * All-zero MAC addresses are rejected, because those could be properties that + * exist in the firmware tables, but were not updated by the firmware. For + * example, the DTS could define 'mac-address' and 'local-mac-address', with + * zero MAC addresses. Some older U-Boots only initialized 'local-mac-address'. + * In this case, the real MAC is in 'local-mac-address', and 'mac-address' + * exists but is all zeros. + */ +int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr) +{ + if (!fwnode_get_mac_addr(fwnode, "mac-address", addr) || + !fwnode_get_mac_addr(fwnode, "local-mac-address", addr) || + !fwnode_get_mac_addr(fwnode, "address", addr)) + return 0; + + return -ENOENT; +} +EXPORT_SYMBOL(fwnode_get_mac_address); + +/** + * device_get_mac_address - Get the MAC for a given device + * @dev: Pointer to the device + * @addr: Address of buffer to store the MAC in + */ +int device_get_mac_address(struct device *dev, char *addr) +{ + return fwnode_get_mac_address(dev_fwnode(dev), addr); +} +EXPORT_SYMBOL(device_get_mac_address); + +/** + * device_get_ethdev_address - Set netdev's MAC address from a given device + * @dev: Pointer to the device + * @netdev: Pointer to netdev to write the address to + * + * Wrapper around device_get_mac_address() which writes the address + * directly to netdev->dev_addr. + */ +int device_get_ethdev_address(struct device *dev, struct net_device *netdev) +{ + u8 addr[ETH_ALEN]; + int ret; + + ret = device_get_mac_address(dev, addr); + if (!ret) + eth_hw_addr_set(netdev, addr); + return ret; +} +EXPORT_SYMBOL(device_get_ethdev_address); diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 0a19470efbfb..b76432e70e6b 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o fec.o eeprom.o stats.o phc_vclocks.o + tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index f2abc3152888..bf6e8c2f9bf7 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -89,7 +89,8 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) if (copy_to_user(useraddr, &cmd, sizeof(cmd))) return -EFAULT; useraddr += sizeof(cmd); - if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) + if (copy_to_user(useraddr, features, + array_size(copy_size, sizeof(*features)))) return -EFAULT; return 0; @@ -799,7 +800,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, goto out; useraddr += offsetof(struct ethtool_sset_info, data); - if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) + if (copy_to_user(useraddr, info_buf, array_size(idx, sizeof(u32)))) goto out; ret = 0; @@ -1022,7 +1023,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr, { int i; - if (copy_from_user(indir, useraddr, size * sizeof(indir[0]))) + if (copy_from_user(indir, useraddr, array_size(size, sizeof(indir[0])))) return -EFAULT; /* Validate ring indices */ @@ -1537,6 +1538,10 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, ret = getter(dev, &eeprom, data); if (ret) break; + if (!eeprom.len) { + ret = -EIO; + break; + } if (copy_to_user(userbuf, data, eeprom.len)) { ret = -EFAULT; break; @@ -1891,7 +1896,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) if (copy_to_user(useraddr, &test, sizeof(test))) goto out; useraddr += sizeof(test); - if (copy_to_user(useraddr, data, test.len * sizeof(u64))) + if (copy_to_user(useraddr, data, array_size(test.len, sizeof(u64)))) goto out; ret = 0; @@ -1933,7 +1938,8 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) goto out; useraddr += sizeof(gstrings); if (gstrings.len && - copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) + copy_to_user(useraddr, data, + array_size(gstrings.len, ETH_GSTRING_LEN))) goto out; ret = 0; diff --git a/net/ethtool/module.c b/net/ethtool/module.c new file mode 100644 index 000000000000..bc2cef11bbda --- /dev/null +++ b/net/ethtool/module.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/ethtool.h> + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct module_req_info { + struct ethnl_req_info base; +}; + +struct module_reply_data { + struct ethnl_reply_data base; + struct ethtool_module_power_mode_params power; +}; + +#define MODULE_REPDATA(__reply_base) \ + container_of(__reply_base, struct module_reply_data, base) + +/* MODULE_GET */ + +const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1] = { + [ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int module_get_power_mode(struct net_device *dev, + struct module_reply_data *data, + struct netlink_ext_ack *extack) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops->get_module_power_mode) + return 0; + + return ops->get_module_power_mode(dev, &data->power, extack); +} + +static int module_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct module_reply_data *data = MODULE_REPDATA(reply_base); + struct netlink_ext_ack *extack = info ? info->extack : NULL; + struct net_device *dev = reply_base->dev; + int ret; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + ret = module_get_power_mode(dev, data, extack); + if (ret < 0) + goto out_complete; + +out_complete: + ethnl_ops_complete(dev); + return ret; +} + +static int module_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + struct module_reply_data *data = MODULE_REPDATA(reply_base); + int len = 0; + + if (data->power.policy) + len += nla_total_size(sizeof(u8)); /* _MODULE_POWER_MODE_POLICY */ + + if (data->power.mode) + len += nla_total_size(sizeof(u8)); /* _MODULE_POWER_MODE */ + + return len; +} + +static int module_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct module_reply_data *data = MODULE_REPDATA(reply_base); + + if (data->power.policy && + nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE_POLICY, + data->power.policy)) + return -EMSGSIZE; + + if (data->power.mode && + nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE, data->power.mode)) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_module_request_ops = { + .request_cmd = ETHTOOL_MSG_MODULE_GET, + .reply_cmd = ETHTOOL_MSG_MODULE_GET_REPLY, + .hdr_attr = ETHTOOL_A_MODULE_HEADER, + .req_info_size = sizeof(struct module_req_info), + .reply_data_size = sizeof(struct module_reply_data), + + .prepare_data = module_prepare_data, + .reply_size = module_reply_size, + .fill_reply = module_fill_reply, +}; + +/* MODULE_SET */ + +const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1] = { + [ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_MODULE_POWER_MODE_POLICY] = + NLA_POLICY_RANGE(NLA_U8, ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH, + ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO), +}; + +static int module_set_power_mode(struct net_device *dev, struct nlattr **tb, + bool *p_mod, struct netlink_ext_ack *extack) +{ + struct ethtool_module_power_mode_params power = {}; + struct ethtool_module_power_mode_params power_new; + const struct ethtool_ops *ops = dev->ethtool_ops; + int ret; + + if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) + return 0; + + if (!ops->get_module_power_mode || !ops->set_module_power_mode) { + NL_SET_ERR_MSG_ATTR(extack, + tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], + "Setting power mode policy is not supported by this device"); + return -EOPNOTSUPP; + } + + power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]); + ret = ops->get_module_power_mode(dev, &power, extack); + if (ret < 0) + return ret; + + if (power_new.policy == power.policy) + return 0; + *p_mod = true; + + return ops->set_module_power_mode(dev, &power_new, extack); +} + +int ethnl_set_module(struct sk_buff *skb, struct genl_info *info) +{ + struct ethnl_req_info req_info = {}; + struct nlattr **tb = info->attrs; + struct net_device *dev; + bool mod = false; + int ret; + + ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_MODULE_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + + ret = module_set_power_mode(dev, tb, &mod, info->extack); + if (ret < 0) + goto out_ops; + + if (!mod) + goto out_ops; + + ethtool_notify(dev, ETHTOOL_MSG_MODULE_NTF, NULL); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); + dev_put(dev); + return ret; +} diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 1797a0a90019..38b44c0291b1 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -282,6 +282,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops, [ETHTOOL_MSG_STATS_GET] = ðnl_stats_request_ops, [ETHTOOL_MSG_PHC_VCLOCKS_GET] = ðnl_phc_vclocks_request_ops, + [ETHTOOL_MSG_MODULE_GET] = ðnl_module_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -593,6 +594,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_PAUSE_NTF] = ðnl_pause_request_ops, [ETHTOOL_MSG_EEE_NTF] = ðnl_eee_request_ops, [ETHTOOL_MSG_FEC_NTF] = ðnl_fec_request_ops, + [ETHTOOL_MSG_MODULE_NTF] = ðnl_module_request_ops, }; /* default notification handler */ @@ -686,6 +688,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_PAUSE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -999,6 +1002,22 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_phc_vclocks_get_policy, .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_MODULE_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_module_get_policy, + .maxattr = ARRAY_SIZE(ethnl_module_get_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_MODULE_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_module, + .policy = ethnl_module_set_policy, + .maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index e8987e28036f..836ee7157848 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -337,6 +337,7 @@ extern const struct ethnl_request_ops ethnl_fec_request_ops; extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops; extern const struct ethnl_request_ops ethnl_stats_request_ops; extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops; +extern const struct ethnl_request_ops ethnl_module_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -373,6 +374,8 @@ extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1]; extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1]; extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1]; +extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1]; +extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); @@ -391,6 +394,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_module(struct sk_buff *skb, struct genl_info *info); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 26c32407f029..e00fbb16391f 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -493,7 +493,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], INIT_LIST_HEAD(&hsr->self_node_db); spin_lock_init(&hsr->list_lock); - ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); + eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); /* initialize protocol specific functions */ if (protocol_version == PRP_V1) { diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index f7e284f23b1f..b099c3150150 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -75,7 +75,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port->type == HSR_PT_SLAVE_A) { - ether_addr_copy(master->dev->dev_addr, dev->dev_addr); + eth_hw_addr_set(master->dev, dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev); } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 3297e7fa9945..2cf62718a282 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -157,7 +157,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, lowpan_802154_dev(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ - memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN); + __dev_addr_set(ldev, wdev->dev_addr, IEEE802154_ADDR_LEN); /* We need headroom for possible wpan_dev_hard_header call and tailroom * for encryption/fcs handling. The lowpan interface will replace * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1d816a5fd3eb..8eb428387bac 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -133,13 +133,9 @@ void inet_sock_destruct(struct sock *sk) struct inet_sock *inet = inet_sk(sk); __skb_queue_purge(&sk->sk_receive_queue); - if (sk->sk_rx_skb_cache) { - __kfree_skb(sk->sk_rx_skb_cache); - sk->sk_rx_skb_cache = NULL; - } __skb_queue_purge(&sk->sk_error_queue); - sk_mem_reclaim(sk); + sk_mem_reclaim_final(sk); if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { pr_err("Attempt to release TCP socket in state %d %p\n", @@ -1666,12 +1662,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); -u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt) -{ - return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt); -} -EXPORT_SYMBOL_GPL(snmp_get_cpu_field); - unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 099259fc826a..62d5f99760aa 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -73,7 +73,7 @@ struct cipso_v4_map_cache_entry { static struct cipso_v4_map_cache_bkt *cipso_v4_cache; /* Restricted bitmap (tag #1) flags */ -int cipso_v4_rbm_optfmt = 0; +int cipso_v4_rbm_optfmt; int cipso_v4_rbm_strictvalid = 1; /* diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 4a8550c49202..48f337ccf949 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -9,7 +9,6 @@ #include <linux/types.h> #include <linux/module.h> -#include <linux/ip.h> #include <linux/in.h> #include <net/ip.h> #include <net/sock.h> diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index 0c28bd469a68..0e23ade74493 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -6,7 +6,6 @@ #include <linux/export.h> #include <net/net_namespace.h> #include <net/fib_notifier.h> -#include <net/netns/ipv4.h> #include <net/ip_fib.h> int call_fib4_notifier(struct notifier_block *nb, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f25d02ad4a8a..f7fea3a7c5e6 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1015,7 +1015,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - percpu_counter_dec(sk->sk_prot->orphan_count); + this_cpu_dec(*sk->sk_prot->orphan_count); sock_put(sk); } @@ -1074,7 +1074,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, sock_orphan(child); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bfb522e51346..75737267746f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -598,7 +598,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0fe6c936dc54..2ac2b95c5694 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -986,7 +986,7 @@ static int ipgre_tunnel_init(struct net_device *dev) __gre_tunnel_init(dev); - memcpy(dev->dev_addr, &iph->saddr, 4); + __dev_addr_set(dev, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fe9101d3d69e..5a473319d3a5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -834,7 +834,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p->iph.saddr, 4); + __dev_addr_set(dev, &p->iph.saddr, 4); memcpy(dev->broadcast, &p->iph.daddr, 4); } ip_tunnel_add(itn, t); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index efe25a0172e6..8c2bd1d9ddce 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -425,7 +425,7 @@ static int vti_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; - memcpy(dev->dev_addr, &iph->saddr, 4); + __dev_addr_set(dev, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); dev->flags = IFF_NOARP; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 3aa78ccbec3e..123ea63a04cb 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -380,7 +380,7 @@ static int ipip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); + __dev_addr_set(dev, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); tunnel->tun_hlen = 0; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c53f14b94356..ffc0cab7cf18 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -179,10 +179,11 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) return (void *)entry + entry->next_offset; } -unsigned int arpt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table) +unsigned int arpt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) { + const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 3de78416ec76..78cd5ee24448 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -26,14 +26,6 @@ static const struct xt_table packet_filter = { .priority = NF_IP_PRI_FILTER, }; -/* The work comes in here from netfilter.c */ -static unsigned int -arptable_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return arpt_do_table(skb, state, priv); -} - static struct nf_hook_ops *arpfilter_ops __read_mostly; static int arptable_filter_table_init(struct net *net) @@ -72,7 +64,7 @@ static int __init arptable_filter_init(void) if (ret < 0) return ret; - arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook); + arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); if (IS_ERR(arpfilter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(arpfilter_ops); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 13acb687c19a..2ed7c58b471a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -222,10 +222,11 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table) +ipt_do_table(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) { + const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 0eb0e2ab9bfc..b9062f4552ac 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -28,13 +28,6 @@ static const struct xt_table packet_filter = { .priority = NF_IP_PRI_FILTER, }; -static unsigned int -iptable_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ @@ -90,7 +83,7 @@ static int __init iptable_filter_init(void) if (ret < 0) return ret; - filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); + filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 40417a3f930b..3abb430af9e6 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -34,7 +34,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv) +ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; const struct iphdr *iph; @@ -50,7 +50,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pri daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, state, priv); + ret = ipt_do_table(priv, skb, state); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); @@ -75,8 +75,8 @@ iptable_mangle_hook(void *priv, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) - return ipt_mangle_out(skb, state, priv); - return ipt_do_table(skb, state, priv); + return ipt_mangle_out(priv, skb, state); + return ipt_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 45d7e072e6a5..56f6ecc43451 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -29,34 +29,27 @@ static const struct xt_table nf_nat_ipv4_table = { .af = NFPROTO_IPV4, }; -static unsigned int iptable_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, }, { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST, }, { - .hook = iptable_nat_do_chain, + .hook = ipt_do_table, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 8265c6765705..ca5e5b21587c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -32,14 +32,6 @@ static const struct xt_table packet_raw_before_defrag = { .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -iptable_raw_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static struct nf_hook_ops *rawtable_ops __read_mostly; static int iptable_raw_table_init(struct net *net) @@ -90,7 +82,7 @@ static int __init iptable_raw_init(void) if (ret < 0) return ret; - rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index f519162a2fa5..d885443cb267 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -33,13 +33,6 @@ static const struct xt_table security_table = { .priority = NF_IP_PRI_SECURITY, }; -static unsigned int -iptable_security_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ipt_do_table(skb, state, priv); -} - static struct nf_hook_ops *sectbl_ops __read_mostly; static int iptable_security_table_init(struct net *net) @@ -78,7 +71,7 @@ static int __init iptable_security_init(void) if (ret < 0) return ret; - sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); + sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b0d3a09dc84e..f30273afb539 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -53,7 +53,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; int orphans, sockets; - orphans = percpu_counter_sum_positive(&tcp_orphan_count); + orphans = tcp_orphan_count_sum(); sockets = proto_sockets_allocated_sum_positive(&tcp_prot); socket_seq_show(seq); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d6899ab5fb39..0b4103b1e622 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -61,15 +61,11 @@ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/module.h> -#include <linux/uaccess.h> #include <linux/bitops.h> -#include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/memblock.h> -#include <linux/string.h> #include <linux/socket.h> -#include <linux/sockios.h> #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> @@ -84,20 +80,17 @@ #include <linux/netfilter_ipv4.h> #include <linux/random.h> #include <linux/rcupdate.h> -#include <linux/times.h> #include <linux/slab.h> #include <linux/jhash.h> #include <net/dst.h> #include <net/dst_metadata.h> #include <net/net_namespace.h> -#include <net/protocol.h> #include <net/ip.h> #include <net/route.h> #include <net/inetpeer.h> #include <net/sock.h> #include <net/ip_fib.h> #include <net/nexthop.h> -#include <net/arp.h> #include <net/tcp.h> #include <net/icmp.h> #include <net/xfrm.h> @@ -109,7 +102,6 @@ #endif #include <net/secure_seq.h> #include <net/ip_tunnels.h> -#include <net/l3mdev.h> #include "fib_lookup.h" diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 33792cf55a79..8696dc343ad2 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -7,8 +7,6 @@ */ #include <linux/tcp.h> -#include <linux/slab.h> -#include <linux/random.h> #include <linux/siphash.h> #include <linux/kernel.h> #include <linux/export.h> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6f1e64d49232..97eb54774924 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -6,25 +6,16 @@ * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] */ -#include <linux/mm.h> -#include <linux/module.h> #include <linux/sysctl.h> -#include <linux/igmp.h> -#include <linux/inetdevice.h> #include <linux/seqlock.h> #include <linux/init.h> #include <linux/slab.h> -#include <linux/nsproxy.h> -#include <linux/swap.h> -#include <net/snmp.h> #include <net/icmp.h> #include <net/ip.h> #include <net/ip_fib.h> -#include <net/route.h> #include <net/tcp.h> #include <net/udp.h> #include <net/cipso_ipv4.h> -#include <net/inet_frag.h> #include <net/ping.h> #include <net/protocol.h> #include <net/netevent.h> @@ -594,18 +585,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, - { - .procname = "tcp_rx_skb_cache", - .data = &tcp_rx_skb_cache_key.key, - .mode = 0644, - .proc_handler = proc_do_static_key, - }, - { - .procname = "tcp_tx_skb_cache", - .data = &tcp_tx_skb_cache_key.key, - .mode = 0644, - .proc_handler = proc_do_static_key, - }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e8b48df73c85..c2d9830136d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -287,8 +287,8 @@ enum { TCP_CMSG_TS = 2 }; -struct percpu_counter tcp_orphan_count; -EXPORT_SYMBOL_GPL(tcp_orphan_count); +DEFINE_PER_CPU(unsigned int, tcp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); @@ -325,11 +325,6 @@ struct tcp_splice_state { unsigned long tcp_memory_pressure __read_mostly; EXPORT_SYMBOL_GPL(tcp_memory_pressure); -DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); -EXPORT_SYMBOL(tcp_rx_skb_cache_key); - -DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); - void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; @@ -647,7 +642,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) } EXPORT_SYMBOL(tcp_ioctl); -static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) +void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; @@ -658,7 +653,7 @@ static inline bool forced_push(const struct tcp_sock *tp) return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } -static void skb_entail(struct sock *sk, struct sk_buff *skb) +void tcp_skb_entail(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -866,18 +861,6 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, { struct sk_buff *skb; - if (likely(!size)) { - skb = sk->sk_tx_skb_cache; - if (skb) { - skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); - sk->sk_tx_skb_cache = NULL; - pskb_trim(skb, 0); - INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); - skb_shinfo(skb)->tx_flags = 0; - memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb)); - return skb; - } - } /* The TCP header must be at least 32-bit aligned. */ size = ALIGN(size, 4); @@ -963,8 +946,8 @@ void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) } } -struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, - struct page *page, int offset, size_t *size) +static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, + struct page *page, int offset, size_t *size) { struct sk_buff *skb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -985,7 +968,7 @@ new_segment: #ifdef CONFIG_TLS_DEVICE skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); #endif - skb_entail(sk, skb); + tcp_skb_entail(sk, skb); copy = size_goal; } @@ -1314,7 +1297,7 @@ new_segment: process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL; - skb_entail(sk, skb); + tcp_skb_entail(sk, skb); copy = size_goal; /* All packets are restored as if they have @@ -2690,11 +2673,36 @@ void tcp_shutdown(struct sock *sk, int how) } EXPORT_SYMBOL(tcp_shutdown); +int tcp_orphan_count_sum(void) +{ + int i, total = 0; + + for_each_possible_cpu(i) + total += per_cpu(tcp_orphan_count, i); + + return max(total, 0); +} + +static int tcp_orphan_cache; +static struct timer_list tcp_orphan_timer; +#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100) + +static void tcp_orphan_update(struct timer_list *unused) +{ + WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum()); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); +} + +static bool tcp_too_many_orphans(int shift) +{ + return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; +} + bool tcp_check_oom(struct sock *sk, int shift) { bool too_many_orphans, out_of_socket_memory; - too_many_orphans = tcp_too_many_orphans(sk, shift); + too_many_orphans = tcp_too_many_orphans(shift); out_of_socket_memory = tcp_out_of_memory(sk); if (too_many_orphans) @@ -2803,7 +2811,7 @@ adjudge_to_death: /* remove backlog if any, without releasing ownership. */ __release_sock(sk); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(tcp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -2920,11 +2928,6 @@ void tcp_write_queue_purge(struct sock *sk) sk_wmem_free_skb(sk, skb); } tcp_rtx_queue_purge(sk); - skb = sk->sk_tx_skb_cache; - if (skb) { - __kfree_skb(skb); - sk->sk_tx_skb_cache = NULL; - } INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); @@ -2961,10 +2964,6 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); - if (sk->sk_rx_skb_cache) { - __kfree_skb(sk->sk_rx_skb_cache); - sk->sk_rx_skb_cache = NULL; - } WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->urg_data = 0; tcp_write_queue_purge(sk); @@ -4505,7 +4504,10 @@ void __init tcp_init(void) sizeof_field(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); - percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); + + timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); + inet_hashinfo_init(&tcp_hashinfo); inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", thash_entries, 21, /* one slot per 2 MB*/ diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 59412d6354a0..fdbcf2a6d08e 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -1,13 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/crypto.h> -#include <linux/err.h> -#include <linux/init.h> #include <linux/kernel.h> -#include <linux/list.h> #include <linux/tcp.h> #include <linux/rcupdate.h> -#include <linux/rculist.h> -#include <net/inetpeer.h> #include <net/tcp.h> void tcp_fastopen_init_key_once(struct net *net) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 141e85e6422b..246ab7b5e857 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -500,8 +500,11 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; + if (room <= 0) + return; + /* Check #1 */ - if (room > 0 && !tcp_under_memory_pressure(sk)) { + if (!tcp_under_memory_pressure(sk)) { unsigned int truesize = truesize_adjust(adjust, skb); int incr; @@ -518,6 +521,11 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, tp->rcv_ssthresh += min(room, incr); inet_csk(sk)->icsk_ack.quick |= 1; } + } else { + /* Under pressure: + * Adjust rcv_ssthresh according to reserved mem + */ + tcp_adjust_rcv_ssthresh(sk); } } @@ -3221,7 +3229,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, long seq_rtt_us = -1L; long ca_rtt_us = -1L; u32 pkts_acked = 0; - u32 last_in_flight = 0; bool rtt_update; int flag = 0; @@ -3257,7 +3264,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, if (!first_ackt) first_ackt = last_ackt; - last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; if (before(start_seq, reord)) reord = start_seq; if (!after(scb->end_seq, tp->high_seq)) @@ -3323,8 +3329,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); - if (pkts_acked == 1 && last_in_flight < tp->mss_cache && - last_in_flight && !prior_sacked && fully_acked && + if (pkts_acked == 1 && fully_acked && !prior_sacked && + (tp->snd_una - prior_snd_una) < tp->mss_cache && sack->rate->prior_delivered + 1 == tp->delivered && !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) { /* Conservatively mark a delayed ACK. It's typically @@ -3381,9 +3387,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, if (icsk->icsk_ca_ops->pkts_acked) { struct ack_sample sample = { .pkts_acked = pkts_acked, - .rtt_us = sack->rate->rtt_us, - .in_flight = last_in_flight }; + .rtt_us = sack->rate->rtt_us }; + sample.in_flight = tp->mss_cache * + (tp->delivered - sack->rate->prior_delivered); icsk->icsk_ca_ops->pkts_acked(sk, &sample); } @@ -5346,7 +5353,7 @@ static int tcp_prune_queue(struct sock *sk) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); else if (tcp_under_memory_pressure(sk)) - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + tcp_adjust_rcv_ssthresh(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -5381,7 +5388,7 @@ static int tcp_prune_queue(struct sock *sk) return -1; } -static bool tcp_should_expand_sndbuf(const struct sock *sk) +static bool tcp_should_expand_sndbuf(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -5392,8 +5399,18 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we are under global TCP memory pressure, do not expand. */ - if (tcp_under_memory_pressure(sk)) + if (tcp_under_memory_pressure(sk)) { + int unused_mem = sk_unused_reserved_mem(sk); + + /* Adjust sndbuf according to reserved mem. But make sure + * it never goes below SOCK_MIN_SNDBUF. + * See sk_stream_moderate_sndbuf() for more details. + */ + if (unused_mem > SOCK_MIN_SNDBUF) + WRITE_ONCE(sk->sk_sndbuf, unused_mem); + return false; + } /* If we are under soft global TCP memory pressure, do not expand. */ if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5b8ce65dfc06..5e6d8cb82ce5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1960,7 +1960,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - struct sk_buff *skb_to_free; int sdif = inet_sdif(skb); int dif = inet_iif(skb); const struct iphdr *iph; @@ -2101,17 +2100,12 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - skb_to_free = sk->sk_rx_skb_cache; - sk->sk_rx_skb_cache = NULL; ret = tcp_v4_do_rcv(sk, skb); } else { if (tcp_add_backlog(sk, skb)) goto discard_and_relse; - skb_to_free = NULL; } bh_unlock_sock(sk); - if (skb_to_free) - __kfree_skb(skb_to_free); put_and_return: if (refcounted) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0a4f3f16140a..cf913a66df17 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -19,14 +19,7 @@ * Jorge Cwik, <jorge@laser.satlink.net> */ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/sysctl.h> -#include <linux/workqueue.h> -#include <linux/static_key.h> #include <net/tcp.h> -#include <net/inet_common.h> #include <net/xfrm.h> #include <net/busy_poll.h> diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 95db7a11ba2a..ab552356bdba 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -25,7 +25,6 @@ * 1) Add mechanism to deal with reverse congestion. */ -#include <linux/mm.h> #include <linux/module.h> #include <linux/math64.h> #include <net/tcp.h> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6d72f3ea48c4..3a01e5593a17 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (clone_it) { - TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - - tp->snd_una; oskb = skb; tcp_skb_tsorted_save(oskb) { @@ -2969,8 +2967,7 @@ u32 __tcp_select_window(struct sock *sk) icsk->icsk_ack.quick = 0; if (tcp_under_memory_pressure(sk)) - tp->rcv_ssthresh = min(tp->rcv_ssthresh, - 4U * tp->advmss); + tcp_adjust_rcv_ssthresh(sk); /* free_space might become our new window, make sure we don't * increase it due to wscale. diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 0de693565963..fbab921670cc 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -65,6 +65,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; + TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } @@ -86,6 +87,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, if (!rs->prior_delivered || after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; @@ -138,6 +140,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, } rs->delivered = tp->delivered - rs->prior_delivered; + rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; + /* delivered_ce occupies less than 32 bits in the skb control block */ + rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK; + /* Model sending data and receiving ACKs as separate pipeline phases * for a window. Usually the ACK phase is longer, but with ACK * compression the send phase can be longer. To be safe we use the diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index b97e3635acf5..8efaf8c3fe2a 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -2,11 +2,8 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/socket.h> -#include <linux/udp.h> -#include <linux/types.h> #include <linux/kernel.h> #include <net/dst_metadata.h> -#include <net/net_namespace.h> #include <net/udp.h> #include <net/udp_tunnel.h> diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e504204bca92..bf2e5e5fe142 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -332,10 +332,10 @@ config IPV6_IOAM6_LWTUNNEL bool "IPv6: IOAM Pre-allocated Trace insertion support" depends on IPV6 select LWTUNNEL + select DST_CACHE help - Support for the inline insertion of IOAM Pre-allocated - Trace Header (only on locally generated packets), using - the lightweight tunnels mechanism. + Support for the insertion of IOAM Pre-allocated Trace + Header using the lightweight tunnels mechanism. If unsure, say N. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 1bc7e143217b..3036a45e8a1e 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -5,16 +5,14 @@ obj-$(CONFIG_IPV6) += ipv6.o -ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ +ipv6-y := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o -ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o - -ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o +ipv6-$(CONFIG_SYSCTL) += sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ @@ -29,8 +27,6 @@ ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o -ipv6-objs += $(ipv6-y) - obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o @@ -48,7 +44,8 @@ obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o -obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) +obj-$(CONFIG_INET) += output_core.o protocol.o \ + ip6_offload.o tcpv6_offload.o exthdrs_offload.o obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c6a90b7bbb70..d4fae16deec4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2237,12 +2237,12 @@ static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev) static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev) { - union fwnet_hwaddr *ha; + const union fwnet_hwaddr *ha; if (dev->addr_len != FWNET_ALEN) return -1; - ha = (union fwnet_hwaddr *)dev->dev_addr; + ha = (const union fwnet_hwaddr *)dev->dev_addr; memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id)); eui[0] ^= 2; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3a871a09f962..38ece3b7b839 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -979,7 +979,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); - ioam6_fill_trace_data(skb, ns, trace); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: break; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index a1ac0e3d8c60..47447f0241df 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -610,7 +610,11 @@ int ila_xlat_init_net(struct net *net) if (err) return err; - rhashtable_init(&ilan->xlat.rhash_table, &rht_params); + err = rhashtable_init(&ilan->xlat.rhash_table, &rht_params); + if (err) { + free_bucket_spinlocks(ilan->xlat.locks); + return err; + } return 0; } diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index d128172bb549..122a3d47424c 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -631,7 +631,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, - u8 sclen) + u8 sclen, bool is_input) { struct __kernel_sock_timeval ts; u64 raw64; @@ -645,7 +645,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* hop_lim and node_id */ if (trace->type.bit0) { byte = ipv6_hdr(skb)->hop_limit; - if (skb->dev) + if (is_input) byte--; raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; @@ -730,7 +730,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* hop_lim and node_id (wide) */ if (trace->type.bit8) { byte = ipv6_hdr(skb)->hop_limit; - if (skb->dev) + if (is_input) byte--; raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; @@ -846,7 +846,8 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, /* called with rcu_read_lock() */ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, - struct ioam6_trace_hdr *trace) + struct ioam6_trace_hdr *trace, + bool is_input) { struct ioam6_schema *sc; u8 sclen = 0; @@ -876,7 +877,7 @@ void ioam6_fill_trace_data(struct sk_buff *skb, return; } - __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen, is_input); trace->remlen -= trace->nodelen + sclen; } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 9b7b726f8f45..f90a87389fcc 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/net.h> -#include <linux/netlink.h> #include <linux/in6.h> #include <linux/ioam6.h> #include <linux/ioam6_iptunnel.h> @@ -17,18 +16,26 @@ #include <net/sock.h> #include <net/lwtunnel.h> #include <net/ioam6.h> +#include <net/netlink.h> +#include <net/ipv6.h> +#include <net/dst_cache.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> #define IOAM6_MASK_SHORT_FIELDS 0xff100000 #define IOAM6_MASK_WIDE_FIELDS 0xe00000 struct ioam6_lwt_encap { - struct ipv6_hopopt_hdr eh; - u8 pad[2]; /* 2-octet padding for 4n-alignment */ - struct ioam6_hdr ioamh; - struct ioam6_trace_hdr traceh; + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; } __packed; struct ioam6_lwt { + struct dst_cache cache; + u8 mode; + struct in6_addr tundst; struct ioam6_lwt_encap tuninfo; }; @@ -42,34 +49,19 @@ static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) return &ioam6_lwt_state(lwt)->tuninfo; } -static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) +static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt) { return &(ioam6_lwt_state(lwt)->tuninfo.traceh); } static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8, + IOAM6_IPTUNNEL_MODE_MIN, + IOAM6_IPTUNNEL_MODE_MAX), + [IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), }; -static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, - struct ioam6_trace_hdr *trace) -{ - struct ioam6_trace_hdr *data; - struct nlattr *nla; - int len; - - len = sizeof(*trace); - - nla = nla_reserve(skb, attrtype, len); - if (!nla) - return -EMSGSIZE; - - data = nla_data(nla); - memcpy(data, trace, len); - - return 0; -} - static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) { u32 fields; @@ -101,9 +93,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; struct ioam6_lwt_encap *tuninfo; struct ioam6_trace_hdr *trace; - struct lwtunnel_state *s; - int len_aligned; - int len, err; + struct lwtunnel_state *lwt; + struct ioam6_lwt *ilwt; + int len_aligned, err; + u8 mode; if (family != AF_INET6) return -EINVAL; @@ -113,6 +106,16 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, if (err < 0) return err; + if (!tb[IOAM6_IPTUNNEL_MODE]) + mode = IOAM6_IPTUNNEL_MODE_INLINE; + else + mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]); + + if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) { + NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination"); + return -EINVAL; + } + if (!tb[IOAM6_IPTUNNEL_TRACE]) { NL_SET_ERR_MSG(extack, "missing trace"); return -EINVAL; @@ -125,15 +128,24 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, return -EINVAL; } - len = sizeof(*tuninfo) + trace->remlen * 4; - len_aligned = ALIGN(len, 8); - - s = lwtunnel_state_alloc(len_aligned); - if (!s) + len_aligned = ALIGN(trace->remlen * 4, 8); + lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned); + if (!lwt) return -ENOMEM; - tuninfo = ioam6_lwt_info(s); - tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; + ilwt = ioam6_lwt_state(lwt); + err = dst_cache_init(&ilwt->cache, GFP_ATOMIC); + if (err) { + kfree(lwt); + return err; + } + + ilwt->mode = mode; + if (tb[IOAM6_IPTUNNEL_DST]) + ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]); + + tuninfo = ioam6_lwt_info(lwt); + tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1; tuninfo->pad[0] = IPV6_TLV_PADN; tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; @@ -142,27 +154,39 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, memcpy(&tuninfo->traceh, trace, sizeof(*trace)); - len = len_aligned - len; - if (len == 1) { - tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; - } else if (len > 0) { + if (len_aligned - trace->remlen * 4) { tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; - tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; + tuninfo->traceh.data[trace->remlen * 4 + 1] = 2; } - s->type = LWTUNNEL_ENCAP_IOAM6; - s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + lwt->type = LWTUNNEL_ENCAP_IOAM6; + lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; - *ts = s; + *ts = lwt; return 0; } -static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) +static int ioam6_do_fill(struct net *net, struct sk_buff *skb) { struct ioam6_trace_hdr *trace; - struct ipv6hdr *oldhdr, *hdr; struct ioam6_namespace *ns; + + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); + + ns = ioam6_namespace(net, trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace, false); + + return 0; +} + +static int ioam6_do_inline(struct net *net, struct sk_buff *skb, + struct ioam6_lwt_encap *tuninfo) +{ + struct ipv6hdr *oldhdr, *hdr; int hdrlen, err; hdrlen = (tuninfo->eh.hdrlen + 1) << 3; @@ -191,80 +215,200 @@ static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) hdr->nexthdr = NEXTHDR_HOP; hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); - trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) - + sizeof(struct ipv6_hopopt_hdr) + 2 - + sizeof(struct ioam6_hdr)); + return ioam6_do_fill(net, skb); +} - ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); - if (ns) - ioam6_fill_trace_data(skb, ns, trace); +static int ioam6_do_encap(struct net *net, struct sk_buff *skb, + struct ioam6_lwt_encap *tuninfo, + struct in6_addr *tundst) +{ + struct dst_entry *dst = skb_dst(skb); + struct ipv6hdr *hdr, *inner_hdr; + int hdrlen, len, err; - return 0; + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; + len = sizeof(*hdr) + hdrlen; + + err = skb_cow_head(skb, len + skb->mac_len); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + skb_set_transport_header(skb, sizeof(*hdr)); + + tuninfo->eh.nexthdr = NEXTHDR_IPV6; + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); + + hdr = ipv6_hdr(skb); + memcpy(hdr, inner_hdr, sizeof(*hdr)); + + hdr->nexthdr = NEXTHDR_HOP; + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); + hdr->daddr = *tundst; + ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, + IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); + + skb_postpush_rcsum(skb, hdr, len); + + return ioam6_do_fill(net, skb); } static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; + struct dst_entry *dst = skb_dst(skb); + struct in6_addr orig_daddr; + struct ioam6_lwt *ilwt; int err = -EINVAL; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - /* Only for packets we send and - * that do not contain a Hop-by-Hop yet - */ - if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) - goto out; - - err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); - if (unlikely(err)) + ilwt = ioam6_lwt_state(dst->lwtstate); + orig_daddr = ipv6_hdr(skb)->daddr; + + switch (ilwt->mode) { + case IOAM6_IPTUNNEL_MODE_INLINE: +do_inline: + /* Direct insertion - if there is no Hop-by-Hop yet */ + if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; + + err = ioam6_do_inline(net, skb, &ilwt->tuninfo); + if (unlikely(err)) + goto drop; + + break; + case IOAM6_IPTUNNEL_MODE_ENCAP: +do_encap: + /* Encapsulation (ip6ip6) */ + err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst); + if (unlikely(err)) + goto drop; + + break; + case IOAM6_IPTUNNEL_MODE_AUTO: + /* Automatic (RFC8200 compliant): + * - local packets -> INLINE mode + * - in-transit packets -> ENCAP mode + */ + if (!skb->dev) + goto do_inline; + + goto do_encap; + default: goto drop; + } - err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) goto drop; + if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { + preempt_disable(); + dst = dst_cache_get(&ilwt->cache); + preempt_enable(); + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + + preempt_disable(); + dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); + preempt_enable(); + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); + } out: - return lwt->orig_output(net, sk, skb); - + return dst->lwtstate->orig_output(net, sk, skb); drop: kfree_skb(skb); return err; } +static void ioam6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&ioam6_lwt_state(lwt)->cache); +} + static int ioam6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { - struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); + int err; - if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) - return -EMSGSIZE; + err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode); + if (err) + goto ret; - return 0; + if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) { + err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst); + if (err) + goto ret; + } + + err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh), + &ilwt->tuninfo.traceh); +ret: + return err; } static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) { - struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); + int nlsize; + + nlsize = nla_total_size(sizeof(ilwt->mode)) + + nla_total_size(sizeof(ilwt->tuninfo.traceh)); - return nla_total_size(sizeof(*trace)); + if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) + nlsize += nla_total_size(sizeof(ilwt->tundst)); + + return nlsize; } static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { - struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); - struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); - - return (a_hdr->namespace_id != b_hdr->namespace_id); + struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a); + struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b); + struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a); + struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b); + + return (ilwt_a->mode != ilwt_b->mode || + (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE && + !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) || + trace_a->namespace_id != trace_b->namespace_id); } static const struct lwtunnel_encap_ops ioam6_iptun_ops = { - .build_state = ioam6_build_state, + .build_state = ioam6_build_state, + .destroy_state = ioam6_destroy_state, .output = ioam6_output, - .fill_encap = ioam6_fill_encap_info, + .fill_encap = ioam6_fill_encap_info, .get_encap_size = ioam6_encap_nlsize, - .cmp_encap = ioam6_encap_cmp, - .owner = THIS_MODULE, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, }; int __init ioam6_iptunnel_init(void) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3ad201d372d8..d831d2439693 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1088,7 +1088,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) struct flowi6 *fl6 = &t->fl.u.ip6; if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); } @@ -1521,7 +1521,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (tunnel->parms.collect_md) return 0; - memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); if (ipv6_addr_any(&tunnel->parms.raddr)) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 20a67efda47f..484aca492cc0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1449,7 +1449,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) unsigned int mtu; int t_hlen; - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1d8e3ffa225d..527e9ead7449 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -660,7 +660,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) struct net_device *tdev = NULL; int mtu; - memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4b098521a44c..184190b9ea25 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -142,7 +142,7 @@ struct neigh_table nd_tbl = { }; EXPORT_SYMBOL_GPL(nd_tbl); -void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad) { int space = __ndisc_opt_addr_space(data_len, pad); @@ -165,7 +165,7 @@ void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, - void *data, u8 icmp6_type) + const void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a579ea14a69b..2d816277f2c5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -247,10 +247,10 @@ ip6t_next_entry(const struct ip6t_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(struct sk_buff *skb, - const struct nf_hook_state *state, - struct xt_table *table) +ip6t_do_table(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { + const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 727ee8097012..df785ebda0ca 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -27,14 +27,6 @@ static const struct xt_table packet_filter = { .priority = NF_IP6_PRI_FILTER, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ip6table_filter_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ @@ -90,7 +82,7 @@ static int __init ip6table_filter_init(void) if (ret < 0) return ret; - filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 9b518ce37d6a..a88b2ce4a3cb 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -29,7 +29,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv) +ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; struct in6_addr saddr, daddr; @@ -46,7 +46,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pr /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, state, priv); + ret = ip6t_do_table(priv, skb, state); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -68,8 +68,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) - return ip6t_mangle_out(skb, state, priv); - return ip6t_do_table(skb, state, priv); + return ip6t_mangle_out(priv, skb, state); + return ip6t_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 921c1723a01e..bf3cb3a13600 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -31,34 +31,27 @@ static const struct xt_table nf_nat_ipv6_table = { .af = NFPROTO_IPV6, }; -static unsigned int ip6table_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, { - .hook = ip6table_nat_do_chain, + .hook = ip6t_do_table, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 4f2a04af71d3..08861d5d1f4d 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -31,14 +31,6 @@ static const struct xt_table packet_raw_before_defrag = { .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ip6table_raw_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *rawtable_ops __read_mostly; static int ip6table_raw_table_init(struct net *net) @@ -88,7 +80,7 @@ static int __init ip6table_raw_init(void) return ret; /* Register hooks */ - rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); + rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); if (IS_ERR(rawtable_ops)) { xt_unregister_template(table); return PTR_ERR(rawtable_ops); diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 931674034d8b..4df14a9bae78 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -32,13 +32,6 @@ static const struct xt_table security_table = { .priority = NF_IP6_PRI_SECURITY, }; -static unsigned int -ip6table_security_hook(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip6t_do_table(skb, state, priv); -} - static struct nf_hook_ops *sectbl_ops __read_mostly; static int ip6table_security_table_init(struct net *net) @@ -77,7 +70,7 @@ static int __init ip6table_security_init(void) if (ret < 0) return ret; - sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index e412817fba2f..5daa1c3ed83b 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -374,7 +374,11 @@ static int __net_init seg6_net_init(struct net *net) net->ipv6.seg6_data = sdata; #ifdef CONFIG_IPV6_SEG6_HMAC - seg6_hmac_net_init(net); + if (seg6_hmac_net_init(net)) { + kfree(rcu_dereference_raw(sdata->tun_src)); + kfree(sdata); + return -ENOMEM; + }; #endif return 0; @@ -388,7 +392,7 @@ static void __net_exit seg6_net_exit(struct net *net) seg6_hmac_net_exit(net); #endif - kfree(sdata->tun_src); + kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 687d95dce085..29bc4e7c3046 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -405,9 +405,7 @@ int __net_init seg6_hmac_net_init(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); - rhashtable_init(&sdata->hmac_infos, &rht_params); - - return 0; + return rhashtable_init(&sdata->hmac_infos, &rht_params); } EXPORT_SYMBOL(seg6_hmac_net_init); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ef0c7a7c18e2..1b57ee36d668 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -204,7 +204,7 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + __dev_addr_set(dev, &t->parms.iph.saddr, 4); memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) @@ -1149,7 +1149,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, synchronize_net(); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; - memcpy(t->dev->dev_addr, &p->iph.saddr, 4); + __dev_addr_set(t->dev, &p->iph.saddr, 4); memcpy(t->dev->broadcast, &p->iph.daddr, 4); ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b03dd02c9f13..1ef27cd7dbff 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1621,7 +1621,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { - struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; @@ -1757,17 +1756,12 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - skb_to_free = sk->sk_rx_skb_cache; - sk->sk_rx_skb_cache = NULL; ret = tcp_v6_do_rcv(sk, skb); } else { if (tcp_add_backlog(sk, skb)) goto discard_and_relse; - skb_to_free = NULL; } bh_unlock_sock(sk); - if (skb_to_free) - __kfree_skb(skb_to_free); put_and_return: if (refcounted) sock_put(sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 647c0554d04c..40ca3c1e42a2 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -781,7 +781,7 @@ int llc_conn_ac_send_sabme_cmd_p_set_x(struct sock *sk, struct sk_buff *skb) if (nskb) { struct llc_sap *sap = llc->sap; - u8 *dmac = llc->daddr.mac; + const u8 *dmac = llc->daddr.mac; if (llc->dev->flags & IFF_LOOPBACK) dmac = llc->dev->dev_addr; diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index ad6547736c21..dde9bf08a593 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -80,7 +80,7 @@ out_free: * establishment will inform to upper layer via calling it's confirm * function and passing proper information. */ -int llc_establish_connection(struct sock *sk, u8 *lmac, u8 *dmac, u8 dsap) +int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap) { int rc = -EISCONN; struct llc_addr laddr, daddr; diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index b9ad087bcbd7..5a6466fc626a 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -56,7 +56,7 @@ int llc_mac_hdr_init(struct sk_buff *skb, * package primitive as an event and send to SAP event handler */ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, - unsigned char *dmac, unsigned char dsap) + const unsigned char *dmac, unsigned char dsap) { int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, sap->laddr.lsap, diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index a4eccb98220a..0ff490a73fae 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -26,7 +26,7 @@ #include <net/llc_c_st.h> #include <net/llc_conn.h> -static void llc_ui_format_mac(struct seq_file *seq, u8 *addr) +static void llc_ui_format_mac(struct seq_file *seq, const u8 *addr) { seq_printf(seq, "%pM", addr); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 2b5acb37587f..bd52ac3bee90 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -513,6 +513,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; + sta->cparams.ce_threshold_selector = 0; + sta->cparams.ce_threshold_mask = 0; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 323d3d2d986f..500ed1b81250 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -129,15 +129,14 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) if (!ieee802154_is_valid_extended_unicast_addr(extended_addr)) return -EINVAL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + dev_addr_set(dev, addr->sa_data); sdata->wpan_dev.extended_addr = extended_addr; /* update lowpan interface mac address when * wpan mac has been changed */ if (sdata->wpan_dev.lowpan_dev) - memcpy(sdata->wpan_dev.lowpan_dev->dev_addr, dev->dev_addr, - dev->addr_len); + dev_addr_set(sdata->wpan_dev.lowpan_dev, dev->dev_addr); return mac802154_wpan_update_llsec(dev); } @@ -615,6 +614,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr) { + u8 addr[IEEE802154_EXTENDED_ADDR_LEN]; struct net_device *ndev = NULL; struct ieee802154_sub_if_data *sdata = NULL; int ret; @@ -638,11 +638,12 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, switch (type) { case NL802154_IFTYPE_NODE: ndev->type = ARPHRD_IEEE802154; - if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) - ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr); - else - memcpy(ndev->dev_addr, ndev->perm_addr, - IEEE802154_EXTENDED_ADDR_LEN); + if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) { + ieee802154_le64_to_be64(addr, &extended_addr); + dev_addr_set(ndev, addr); + } else { + dev_addr_set(ndev, ndev->perm_addr); + } break; case NL802154_IFTYPE_MONITOR: ndev->type = ARPHRD_IEEE802154_MONITOR; diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig index 2cdf3d0a28c9..868c92272cbd 100644 --- a/net/mctp/Kconfig +++ b/net/mctp/Kconfig @@ -11,3 +11,8 @@ menuconfig MCTP This option enables core MCTP support. For communicating with other devices, you'll want to enable a driver for a specific hardware channel. + +config MCTP_TEST + bool "MCTP core tests" if !KUNIT_ALL_TESTS + depends on MCTP=y && KUNIT=y + default KUNIT_ALL_TESTS diff --git a/net/mctp/Makefile b/net/mctp/Makefile index 0171333384d7..6cd55233e685 100644 --- a/net/mctp/Makefile +++ b/net/mctp/Makefile @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MCTP) += mctp.o mctp-objs := af_mctp.o device.o route.o neigh.o + +# tests +obj-$(CONFIG_MCTP_TEST) += test/utils.o diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index a9526ac29dff..66a411d60b6c 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -16,6 +16,9 @@ #include <net/mctpdevice.h> #include <net/sock.h> +#define CREATE_TRACE_POINTS +#include <trace/events/mctp.h> + /* socket implementation */ static int mctp_release(struct socket *sock) @@ -223,16 +226,61 @@ static const struct proto_ops mctp_dgram_ops = { .sendpage = sock_no_sendpage, }; +static void mctp_sk_expire_keys(struct timer_list *timer) +{ + struct mctp_sock *msk = container_of(timer, struct mctp_sock, + key_expiry); + struct net *net = sock_net(&msk->sk); + unsigned long next_expiry, flags; + struct mctp_sk_key *key; + struct hlist_node *tmp; + bool next_expiry_valid = false; + + spin_lock_irqsave(&net->mctp.keys_lock, flags); + + hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { + spin_lock(&key->lock); + + if (!time_after_eq(key->expiry, jiffies)) { + trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT); + key->valid = false; + hlist_del_rcu(&key->hlist); + hlist_del_rcu(&key->sklist); + spin_unlock(&key->lock); + mctp_key_unref(key); + continue; + } + + if (next_expiry_valid) { + if (time_before(key->expiry, next_expiry)) + next_expiry = key->expiry; + } else { + next_expiry = key->expiry; + next_expiry_valid = true; + } + spin_unlock(&key->lock); + } + + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + if (next_expiry_valid) + mod_timer(timer, next_expiry); +} + static int mctp_sk_init(struct sock *sk) { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); INIT_HLIST_HEAD(&msk->keys); + timer_setup(&msk->key_expiry, mctp_sk_expire_keys, 0); return 0; } static void mctp_sk_close(struct sock *sk, long timeout) { + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); + + del_timer_sync(&msk->key_expiry); sk_common_release(sk); } @@ -263,21 +311,23 @@ static void mctp_sk_unhash(struct sock *sk) /* remove tag allocations */ spin_lock_irqsave(&net->mctp.keys_lock, flags); hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { - hlist_del_rcu(&key->sklist); - hlist_del_rcu(&key->hlist); + hlist_del(&key->sklist); + hlist_del(&key->hlist); - spin_lock(&key->reasm_lock); + trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED); + + spin_lock(&key->lock); if (key->reasm_head) kfree_skb(key->reasm_head); key->reasm_head = NULL; key->reasm_dead = true; - spin_unlock(&key->reasm_lock); + key->valid = false; + spin_unlock(&key->lock); - kfree_rcu(key, rcu); + /* key is no longer on the lookup lists, unref */ + mctp_key_unref(key); } spin_unlock_irqrestore(&net->mctp.keys_lock, flags); - - synchronize_rcu(); } static struct proto mctp_proto = { @@ -385,7 +435,7 @@ static __exit void mctp_exit(void) sock_unregister(PF_MCTP); } -module_init(mctp_init); +subsys_initcall(mctp_init); module_exit(mctp_exit); MODULE_DESCRIPTION("MCTP core"); diff --git a/net/mctp/device.c b/net/mctp/device.c index b9f38e765f61..3827d62f52c9 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -35,14 +35,6 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->mctp_ptr); } -static void mctp_dev_destroy(struct mctp_dev *mdev) -{ - struct net_device *dev = mdev->dev; - - dev_put(dev); - kfree_rcu(mdev, rcu); -} - static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb, struct mctp_dev *mdev, mctp_eid_t eid) { @@ -255,6 +247,19 @@ static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +void mctp_dev_hold(struct mctp_dev *mdev) +{ + refcount_inc(&mdev->refs); +} + +void mctp_dev_put(struct mctp_dev *mdev) +{ + if (refcount_dec_and_test(&mdev->refs)) { + dev_put(mdev->dev); + kfree_rcu(mdev, rcu); + } +} + static struct mctp_dev *mctp_add_dev(struct net_device *dev) { struct mctp_dev *mdev; @@ -270,7 +275,9 @@ static struct mctp_dev *mctp_add_dev(struct net_device *dev) mdev->net = mctp_default_net(dev_net(dev)); /* associate to net_device */ + refcount_set(&mdev->refs, 1); rcu_assign_pointer(dev->mctp_ptr, mdev); + dev_hold(dev); mdev->dev = dev; @@ -330,12 +337,26 @@ static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr, return 0; } +/* Matches netdev types that should have MCTP handling */ +static bool mctp_known(struct net_device *dev) +{ + /* only register specific types (inc. NONE for TUN devices) */ + return dev->type == ARPHRD_MCTP || + dev->type == ARPHRD_LOOPBACK || + dev->type == ARPHRD_NONE; +} + static void mctp_unregister(struct net_device *dev) { struct mctp_dev *mdev; mdev = mctp_dev_get_rtnl(dev); - + if (mctp_known(dev) != (bool)mdev) { + // Sanity check, should match what was set in mctp_register + netdev_warn(dev, "%s: mdev pointer %d but type (%d) match is %d", + __func__, (bool)mdev, mctp_known(dev), dev->type); + return; + } if (!mdev) return; @@ -345,7 +366,7 @@ static void mctp_unregister(struct net_device *dev) mctp_neigh_remove_dev(mdev); kfree(mdev->addrs); - mctp_dev_destroy(mdev); + mctp_dev_put(mdev); } static int mctp_register(struct net_device *dev) @@ -353,11 +374,17 @@ static int mctp_register(struct net_device *dev) struct mctp_dev *mdev; /* Already registered? */ - if (rtnl_dereference(dev->mctp_ptr)) + mdev = rtnl_dereference(dev->mctp_ptr); + + if (mdev) { + if (!mctp_known(dev)) + netdev_warn(dev, "%s: mctp_dev set for unknown type %d", + __func__, dev->type); return 0; + } - /* only register specific types; MCTP-specific and loopback for now */ - if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK) + /* only register specific types */ + if (!mctp_known(dev)) return 0; mdev = mctp_add_dev(dev); diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c index 90ed2f02d1fb..5cc042121493 100644 --- a/net/mctp/neigh.c +++ b/net/mctp/neigh.c @@ -47,7 +47,7 @@ static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid, } INIT_LIST_HEAD(&neigh->list); neigh->dev = mdev; - dev_hold(neigh->dev->dev); + mctp_dev_hold(neigh->dev); neigh->eid = eid; neigh->source = source; memcpy(neigh->ha, lladdr, lladdr_len); @@ -63,7 +63,7 @@ static void __mctp_neigh_free(struct rcu_head *rcu) { struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu); - dev_put(neigh->dev->dev); + mctp_dev_put(neigh->dev); kfree(neigh); } diff --git a/net/mctp/route.c b/net/mctp/route.c index 5ca186d53cb0..82fb5ae524f6 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -11,6 +11,7 @@ */ #include <linux/idr.h> +#include <linux/kconfig.h> #include <linux/mctp.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> @@ -23,7 +24,10 @@ #include <net/netlink.h> #include <net/sock.h> +#include <trace/events/mctp.h> + static const unsigned int mctp_message_maxlen = 64 * 1024; +static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; /* route output callbacks */ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) @@ -83,25 +87,43 @@ static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, return true; } +/* returns a key (with key->lock held, and refcounted), or NULL if no such + * key exists. + */ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, - mctp_eid_t peer) + mctp_eid_t peer, + unsigned long *irqflags) + __acquires(&key->lock) { struct mctp_sk_key *key, *ret; + unsigned long flags; struct mctp_hdr *mh; u8 tag; - WARN_ON(!rcu_read_lock_held()); - mh = mctp_hdr(skb); tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); ret = NULL; + spin_lock_irqsave(&net->mctp.keys_lock, flags); + + hlist_for_each_entry(key, &net->mctp.keys, hlist) { + if (!mctp_key_match(key, mh->dest, peer, tag)) + continue; - hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) { - if (mctp_key_match(key, mh->dest, peer, tag)) { + spin_lock(&key->lock); + if (key->valid) { + refcount_inc(&key->refs); ret = key; break; } + spin_unlock(&key->lock); + } + + if (ret) { + spin_unlock(&net->mctp.keys_lock); + *irqflags = flags; + } else { + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); } return ret; @@ -121,11 +143,19 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, key->local_addr = local; key->tag = tag; key->sk = &msk->sk; - spin_lock_init(&key->reasm_lock); + key->valid = true; + spin_lock_init(&key->lock); + refcount_set(&key->refs, 1); return key; } +void mctp_key_unref(struct mctp_sk_key *key) +{ + if (refcount_dec_and_test(&key->refs)) + kfree(key); +} + static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) { struct net *net = sock_net(&msk->sk); @@ -138,12 +168,20 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { if (mctp_key_match(tmp, key->local_addr, key->peer_addr, key->tag)) { - rc = -EEXIST; - break; + spin_lock(&tmp->lock); + if (tmp->valid) + rc = -EEXIST; + spin_unlock(&tmp->lock); + if (rc) + break; } } if (!rc) { + refcount_inc(&key->refs); + key->expiry = jiffies + mctp_key_lifetime; + timer_reduce(&msk->key_expiry, key->expiry); + hlist_add_head(&key->hlist, &net->mctp.keys); hlist_add_head(&key->sklist, &msk->keys); } @@ -153,28 +191,35 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) return rc; } -/* Must be called with key->reasm_lock, which it will release. Will schedule - * the key for an RCU free. +/* We're done with the key; unset valid and remove from lists. There may still + * be outstanding refs on the key though... */ static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net, unsigned long flags) - __releases(&key->reasm_lock) + __releases(&key->lock) { struct sk_buff *skb; skb = key->reasm_head; key->reasm_head = NULL; key->reasm_dead = true; - spin_unlock_irqrestore(&key->reasm_lock, flags); + key->valid = false; + spin_unlock_irqrestore(&key->lock, flags); spin_lock_irqsave(&net->mctp.keys_lock, flags); - hlist_del_rcu(&key->hlist); - hlist_del_rcu(&key->sklist); + hlist_del(&key->hlist); + hlist_del(&key->sklist); spin_unlock_irqrestore(&net->mctp.keys_lock, flags); - kfree_rcu(key, rcu); + + /* one unref for the lists */ + mctp_key_unref(key); + + /* and one for the local reference */ + mctp_key_unref(key); if (skb) kfree_skb(skb); + } static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) @@ -248,8 +293,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) rcu_read_lock(); - /* lookup socket / reasm context, exactly matching (src,dest,tag) */ - key = mctp_lookup_key(net, skb, mh->src); + /* lookup socket / reasm context, exactly matching (src,dest,tag). + * we hold a ref on the key, and key->lock held. + */ + key = mctp_lookup_key(net, skb, mh->src, &f); if (flags & MCTP_HDR_FLAG_SOM) { if (key) { @@ -260,10 +307,12 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * key for reassembly - we'll create a more specific * one for future packets if required (ie, !EOM). */ - key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY); + key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); if (key) { msk = container_of(key->sk, struct mctp_sock, sk); + spin_unlock_irqrestore(&key->lock, f); + mctp_key_unref(key); key = NULL; } } @@ -282,11 +331,13 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) if (flags & MCTP_HDR_FLAG_EOM) { sock_queue_rcv_skb(&msk->sk, skb); if (key) { - spin_lock_irqsave(&key->reasm_lock, f); /* we've hit a pending reassembly; not much we * can do but drop it */ + trace_mctp_key_release(key, + MCTP_TRACE_KEY_REPLIED); __mctp_key_unlock_drop(key, net, f); + key = NULL; } rc = 0; goto out_unlock; @@ -303,7 +354,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) goto out_unlock; } - /* we can queue without the reasm lock here, as the + /* we can queue without the key lock here, as the * key isn't observable yet */ mctp_frag_queue(key, skb); @@ -318,17 +369,22 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) if (rc) kfree(key); - } else { - /* existing key: start reassembly */ - spin_lock_irqsave(&key->reasm_lock, f); + trace_mctp_key_acquire(key); + /* we don't need to release key->lock on exit */ + mctp_key_unref(key); + key = NULL; + + } else { if (key->reasm_head || key->reasm_dead) { /* duplicate start? drop everything */ + trace_mctp_key_release(key, + MCTP_TRACE_KEY_INVALIDATED); __mctp_key_unlock_drop(key, net, f); rc = -EEXIST; + key = NULL; } else { rc = mctp_frag_queue(key, skb); - spin_unlock_irqrestore(&key->reasm_lock, f); } } @@ -337,8 +393,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * using the message-specific key */ - spin_lock_irqsave(&key->reasm_lock, f); - /* we need to be continuing an existing reassembly... */ if (!key->reasm_head) rc = -EINVAL; @@ -351,9 +405,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) if (!rc && flags & MCTP_HDR_FLAG_EOM) { sock_queue_rcv_skb(key->sk, key->reasm_head); key->reasm_head = NULL; + trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED); __mctp_key_unlock_drop(key, net, f); - } else { - spin_unlock_irqrestore(&key->reasm_lock, f); + key = NULL; } } else { @@ -363,6 +417,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) out_unlock: rcu_read_unlock(); + if (key) { + spin_unlock_irqrestore(&key->lock, f); + mctp_key_unref(key); + } out: if (rc) kfree_skb(skb); @@ -412,7 +470,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) static void mctp_route_release(struct mctp_route *rt) { if (refcount_dec_and_test(&rt->refs)) { - dev_put(rt->dev->dev); + mctp_dev_put(rt->dev); kfree_rcu(rt, rcu); } } @@ -454,11 +512,15 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, lockdep_assert_held(&mns->keys_lock); + key->expiry = jiffies + mctp_key_lifetime; + timer_reduce(&msk->key_expiry, key->expiry); + /* we hold the net->key_lock here, allowing updates to both * then net and sk */ hlist_add_head_rcu(&key->hlist, &mns->keys); hlist_add_head_rcu(&key->sklist, &msk->keys); + refcount_inc(&key->refs); } /* Allocate a locally-owned tag value for (saddr, daddr), and reserve @@ -474,6 +536,10 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk, int rc = -EAGAIN; u8 tagbits; + /* for NULL destination EIDs, we may get a response from any peer */ + if (daddr == MCTP_ADDR_NULL) + daddr = MCTP_ADDR_ANY; + /* be optimistic, alloc now */ key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); if (!key) @@ -488,14 +554,26 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk, * tags. If we find a conflict, clear that bit from tagbits */ hlist_for_each_entry(tmp, &mns->keys, hlist) { + /* We can check the lookup fields (*_addr, tag) without the + * lock held, they don't change over the lifetime of the key. + */ + /* if we don't own the tag, it can't conflict */ if (tmp->tag & MCTP_HDR_FLAG_TO) continue; - if ((tmp->peer_addr == daddr || - tmp->peer_addr == MCTP_ADDR_ANY) && - tmp->local_addr == saddr) + if (!((tmp->peer_addr == daddr || + tmp->peer_addr == MCTP_ADDR_ANY) && + tmp->local_addr == saddr)) + continue; + + spin_lock(&tmp->lock); + /* key must still be valid. If we find a match, clear the + * potential tag value + */ + if (tmp->valid) tagbits &= ~(1 << tmp->tag); + spin_unlock(&tmp->lock); if (!tagbits) break; @@ -504,7 +582,12 @@ static int mctp_alloc_local_tag(struct mctp_sock *msk, if (tagbits) { key->tag = __ffs(tagbits); mctp_reserve_tag(net, key, msk); + trace_mctp_key_acquire(key); + *tagp = key->tag; + /* done with the key in this scope */ + mctp_key_unref(key); + key = NULL; rc = 0; } @@ -552,6 +635,20 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, return rt; } +static struct mctp_route *mctp_route_lookup_null(struct net *net, + struct net_device *dev) +{ + struct mctp_route *rt; + + list_for_each_entry_rcu(rt, &net->mctp.routes, list) { + if (rt->dev->dev == dev && rt->type == RTN_LOCAL && + refcount_inc_not_zero(&rt->refs)) + return rt; + } + + return NULL; +} + /* sends a skb to rt and releases the route. */ int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) { @@ -741,7 +838,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, rt->max = daddr_start + daddr_extent; rt->mtu = mtu; rt->dev = mdev; - dev_hold(rt->dev->dev); + mctp_dev_hold(rt->dev); rt->type = type; rt->output = rtfn; @@ -821,13 +918,18 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct net *net = dev_net(dev); + struct mctp_dev *mdev; struct mctp_skb_cb *cb; struct mctp_route *rt; struct mctp_hdr *mh; - /* basic non-data sanity checks */ - if (dev->type != ARPHRD_MCTP) + rcu_read_lock(); + mdev = __mctp_dev_get(dev); + rcu_read_unlock(); + if (!mdev) { + /* basic non-data sanity checks */ goto err_drop; + } if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) goto err_drop; @@ -841,11 +943,14 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, goto err_drop; cb = __mctp_cb(skb); - rcu_read_lock(); - cb->net = READ_ONCE(__mctp_dev_get(dev)->net); - rcu_read_unlock(); + cb->net = READ_ONCE(mdev->net); rt = mctp_route_lookup(net, cb->net, mh->dest); + + /* NULL EID, but addressed to our physical address */ + if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) + rt = mctp_route_lookup_null(net, dev); + if (!rt) goto err_drop; @@ -926,10 +1031,15 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { + [RTAX_MTU] = { .type = NLA_U32 }, +}; + static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RTA_MAX + 1]; + struct nlattr *tbx[RTAX_MAX + 1]; mctp_eid_t daddr_start; struct mctp_dev *mdev; struct rtmsg *rtm; @@ -946,8 +1056,15 @@ static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - /* TODO: parse mtu from nlparse */ mtu = 0; + if (tb[RTA_METRICS]) { + rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS], + rta_metrics_policy, NULL); + if (rc < 0) + return rc; + if (tbx[RTAX_MTU]) + mtu = nla_get_u32(tbx[RTAX_MTU]); + } if (rtm->rtm_type != RTN_UNICAST) return -EINVAL; @@ -1116,3 +1233,7 @@ void __exit mctp_routes_exit(void) rtnl_unregister(PF_MCTP, RTM_GETROUTE); dev_remove_pack(&mctp_packet_type); } + +#if IS_ENABLED(CONFIG_MCTP_TEST) +#include "test/route-test.c" +#endif diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c new file mode 100644 index 000000000000..36fac3daf86a --- /dev/null +++ b/net/mctp/test/route-test.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <kunit/test.h> + +#include "utils.h" + +struct mctp_test_route { + struct mctp_route rt; + struct sk_buff_head pkts; +}; + +static int mctp_test_route_output(struct mctp_route *rt, struct sk_buff *skb) +{ + struct mctp_test_route *test_rt = container_of(rt, struct mctp_test_route, rt); + + skb_queue_tail(&test_rt->pkts, skb); + + return 0; +} + +/* local version of mctp_route_alloc() */ +static struct mctp_test_route *mctp_route_test_alloc(void) +{ + struct mctp_test_route *rt; + + rt = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!rt) + return NULL; + + INIT_LIST_HEAD(&rt->rt.list); + refcount_set(&rt->rt.refs, 1); + rt->rt.output = mctp_test_route_output; + + skb_queue_head_init(&rt->pkts); + + return rt; +} + +static struct mctp_test_route *mctp_test_create_route(struct net *net, + struct mctp_dev *dev, + mctp_eid_t eid, + unsigned int mtu) +{ + struct mctp_test_route *rt; + + rt = mctp_route_test_alloc(); + if (!rt) + return NULL; + + rt->rt.min = eid; + rt->rt.max = eid; + rt->rt.mtu = mtu; + rt->rt.type = RTN_UNSPEC; + if (dev) + mctp_dev_hold(dev); + rt->rt.dev = dev; + + list_add_rcu(&rt->rt.list, &net->mctp.routes); + + return rt; +} + +static void mctp_test_route_destroy(struct kunit *test, + struct mctp_test_route *rt) +{ + unsigned int refs; + + rtnl_lock(); + list_del_rcu(&rt->rt.list); + rtnl_unlock(); + + skb_queue_purge(&rt->pkts); + if (rt->rt.dev) + mctp_dev_put(rt->rt.dev); + + refs = refcount_read(&rt->rt.refs); + KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance"); + + kfree_rcu(&rt->rt, rcu); +} + +static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, + unsigned int data_len) +{ + size_t hdr_len = sizeof(*hdr); + struct sk_buff *skb; + unsigned int i; + u8 *buf; + + skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); + if (!skb) + return NULL; + + memcpy(skb_put(skb, hdr_len), hdr, hdr_len); + + buf = skb_put(skb, data_len); + for (i = 0; i < data_len; i++) + buf[i] = i & 0xff; + + return skb; +} + +static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, + const void *data, + size_t data_len) +{ + size_t hdr_len = sizeof(*hdr); + struct sk_buff *skb; + + skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); + if (!skb) + return NULL; + + memcpy(skb_put(skb, hdr_len), hdr, hdr_len); + memcpy(skb_put(skb, data_len), data, data_len); + + return skb; +} + +#define mctp_test_create_skb_data(h, d) \ + __mctp_test_create_skb_data(h, d, sizeof(*d)) + +struct mctp_frag_test { + unsigned int mtu; + unsigned int msgsize; + unsigned int n_frags; +}; + +static void mctp_test_fragment(struct kunit *test) +{ + const struct mctp_frag_test *params; + int rc, i, n, mtu, msgsize; + struct mctp_test_route *rt; + struct sk_buff *skb; + struct mctp_hdr hdr; + u8 seq; + + params = test->param_value; + mtu = params->mtu; + msgsize = params->msgsize; + + hdr.ver = 1; + hdr.src = 8; + hdr.dest = 10; + hdr.flags_seq_tag = MCTP_HDR_FLAG_TO; + + skb = mctp_test_create_skb(&hdr, msgsize); + KUNIT_ASSERT_TRUE(test, skb); + + rt = mctp_test_create_route(&init_net, NULL, 10, mtu); + KUNIT_ASSERT_TRUE(test, rt); + + /* The refcount would usually be incremented as part of a route lookup, + * but we're setting the route directly here. + */ + refcount_inc(&rt->rt.refs); + + rc = mctp_do_fragment_route(&rt->rt, skb, mtu, MCTP_TAG_OWNER); + KUNIT_EXPECT_FALSE(test, rc); + + n = rt->pkts.qlen; + + KUNIT_EXPECT_EQ(test, n, params->n_frags); + + for (i = 0;; i++) { + struct mctp_hdr *hdr2; + struct sk_buff *skb2; + u8 tag_mask, seq2; + bool first, last; + + first = i == 0; + last = i == (n - 1); + + skb2 = skb_dequeue(&rt->pkts); + + if (!skb2) + break; + + hdr2 = mctp_hdr(skb2); + + tag_mask = MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO; + + KUNIT_EXPECT_EQ(test, hdr2->ver, hdr.ver); + KUNIT_EXPECT_EQ(test, hdr2->src, hdr.src); + KUNIT_EXPECT_EQ(test, hdr2->dest, hdr.dest); + KUNIT_EXPECT_EQ(test, hdr2->flags_seq_tag & tag_mask, + hdr.flags_seq_tag & tag_mask); + + KUNIT_EXPECT_EQ(test, + !!(hdr2->flags_seq_tag & MCTP_HDR_FLAG_SOM), first); + KUNIT_EXPECT_EQ(test, + !!(hdr2->flags_seq_tag & MCTP_HDR_FLAG_EOM), last); + + seq2 = (hdr2->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) & + MCTP_HDR_SEQ_MASK; + + if (first) { + seq = seq2; + } else { + seq++; + KUNIT_EXPECT_EQ(test, seq2, seq & MCTP_HDR_SEQ_MASK); + } + + if (!last) + KUNIT_EXPECT_EQ(test, skb2->len, mtu); + else + KUNIT_EXPECT_LE(test, skb2->len, mtu); + + kfree_skb(skb2); + } + + mctp_test_route_destroy(test, rt); +} + +static const struct mctp_frag_test mctp_frag_tests[] = { + {.mtu = 68, .msgsize = 63, .n_frags = 1}, + {.mtu = 68, .msgsize = 64, .n_frags = 1}, + {.mtu = 68, .msgsize = 65, .n_frags = 2}, + {.mtu = 68, .msgsize = 66, .n_frags = 2}, + {.mtu = 68, .msgsize = 127, .n_frags = 2}, + {.mtu = 68, .msgsize = 128, .n_frags = 2}, + {.mtu = 68, .msgsize = 129, .n_frags = 3}, + {.mtu = 68, .msgsize = 130, .n_frags = 3}, +}; + +static void mctp_frag_test_to_desc(const struct mctp_frag_test *t, char *desc) +{ + sprintf(desc, "mtu %d len %d -> %d frags", + t->msgsize, t->mtu, t->n_frags); +} + +KUNIT_ARRAY_PARAM(mctp_frag, mctp_frag_tests, mctp_frag_test_to_desc); + +struct mctp_rx_input_test { + struct mctp_hdr hdr; + bool input; +}; + +static void mctp_test_rx_input(struct kunit *test) +{ + const struct mctp_rx_input_test *params; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb; + + params = test->param_value; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + skb = mctp_test_create_skb(¶ms->hdr, 1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + __mctp_cb(skb); + + mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL); + + KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input); + + mctp_test_route_destroy(test, rt); + mctp_test_destroy_dev(dev); +} + +#define RX_HDR(_ver, _src, _dest, _fst) \ + { .ver = _ver, .src = _src, .dest = _dest, .flags_seq_tag = _fst } + +/* we have a route for EID 8 only */ +static const struct mctp_rx_input_test mctp_rx_input_tests[] = { + { .hdr = RX_HDR(1, 10, 8, 0), .input = true }, + { .hdr = RX_HDR(1, 10, 9, 0), .input = false }, /* no input route */ + { .hdr = RX_HDR(2, 10, 8, 0), .input = false }, /* invalid version */ +}; + +static void mctp_rx_input_test_to_desc(const struct mctp_rx_input_test *t, + char *desc) +{ + sprintf(desc, "{%x,%x,%x,%x}", t->hdr.ver, t->hdr.src, t->hdr.dest, + t->hdr.flags_seq_tag); +} + +KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests, + mctp_rx_input_test_to_desc); + +/* set up a local dev, route on EID 8, and a socket listening on type 0 */ +static void __mctp_route_test_init(struct kunit *test, + struct mctp_test_dev **devp, + struct mctp_test_route **rtp, + struct socket **sockp) +{ + struct sockaddr_mctp addr; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + addr.smctp_family = AF_MCTP; + addr.smctp_network = MCTP_NET_ANY; + addr.smctp_addr.s_addr = 8; + addr.smctp_type = 0; + rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr)); + KUNIT_ASSERT_EQ(test, rc, 0); + + *rtp = rt; + *devp = dev; + *sockp = sock; +} + +static void __mctp_route_test_fini(struct kunit *test, + struct mctp_test_dev *dev, + struct mctp_test_route *rt, + struct socket *sock) +{ + sock_release(sock); + mctp_test_route_destroy(test, rt); + mctp_test_destroy_dev(dev); +} + +struct mctp_route_input_sk_test { + struct mctp_hdr hdr; + u8 type; + bool deliver; +}; + +static void mctp_test_route_input_sk(struct kunit *test) +{ + const struct mctp_route_input_sk_test *params; + struct sk_buff *skb, *skb2; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + int rc; + + params = test->param_value; + + __mctp_route_test_init(test, &dev, &rt, &sock); + + skb = mctp_test_create_skb_data(¶ms->hdr, ¶ms->type); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + skb->dev = dev->ndev; + __mctp_cb(skb); + + rc = mctp_route_input(&rt->rt, skb); + + if (params->deliver) { + KUNIT_EXPECT_EQ(test, rc, 0); + + skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); + KUNIT_EXPECT_EQ(test, skb->len, 1); + + skb_free_datagram(sock->sk, skb2); + + } else { + KUNIT_EXPECT_NE(test, rc, 0); + skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + KUNIT_EXPECT_PTR_EQ(test, skb2, NULL); + } + + __mctp_route_test_fini(test, dev, rt, sock); +} + +#define FL_S (MCTP_HDR_FLAG_SOM) +#define FL_E (MCTP_HDR_FLAG_EOM) +#define FL_T (MCTP_HDR_FLAG_TO) + +static const struct mctp_route_input_sk_test mctp_route_input_sk_tests[] = { + { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 0, .deliver = true }, + { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 1, .deliver = false }, + { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E), .type = 0, .deliver = false }, + { .hdr = RX_HDR(1, 10, 8, FL_E | FL_T), .type = 0, .deliver = false }, + { .hdr = RX_HDR(1, 10, 8, FL_T), .type = 0, .deliver = false }, + { .hdr = RX_HDR(1, 10, 8, 0), .type = 0, .deliver = false }, +}; + +static void mctp_route_input_sk_to_desc(const struct mctp_route_input_sk_test *t, + char *desc) +{ + sprintf(desc, "{%x,%x,%x,%x} type %d", t->hdr.ver, t->hdr.src, + t->hdr.dest, t->hdr.flags_seq_tag, t->type); +} + +KUNIT_ARRAY_PARAM(mctp_route_input_sk, mctp_route_input_sk_tests, + mctp_route_input_sk_to_desc); + +struct mctp_route_input_sk_reasm_test { + const char *name; + struct mctp_hdr hdrs[4]; + int n_hdrs; + int rx_len; +}; + +static void mctp_test_route_input_sk_reasm(struct kunit *test) +{ + const struct mctp_route_input_sk_reasm_test *params; + struct sk_buff *skb, *skb2; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + int i, rc; + u8 c; + + params = test->param_value; + + __mctp_route_test_init(test, &dev, &rt, &sock); + + for (i = 0; i < params->n_hdrs; i++) { + c = i; + skb = mctp_test_create_skb_data(¶ms->hdrs[i], &c); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + skb->dev = dev->ndev; + __mctp_cb(skb); + + rc = mctp_route_input(&rt->rt, skb); + } + + skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + + if (params->rx_len) { + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); + KUNIT_EXPECT_EQ(test, skb2->len, params->rx_len); + skb_free_datagram(sock->sk, skb2); + + } else { + KUNIT_EXPECT_PTR_EQ(test, skb2, NULL); + } + + __mctp_route_test_fini(test, dev, rt, sock); +} + +#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_T | (f) | ((s) << MCTP_HDR_SEQ_SHIFT)) + +static const struct mctp_route_input_sk_reasm_test mctp_route_input_sk_reasm_tests[] = { + { + .name = "single packet", + .hdrs = { + RX_FRAG(FL_S | FL_E, 0), + }, + .n_hdrs = 1, + .rx_len = 1, + }, + { + .name = "single packet, offset seq", + .hdrs = { + RX_FRAG(FL_S | FL_E, 1), + }, + .n_hdrs = 1, + .rx_len = 1, + }, + { + .name = "start & end packets", + .hdrs = { + RX_FRAG(FL_S, 0), + RX_FRAG(FL_E, 1), + }, + .n_hdrs = 2, + .rx_len = 2, + }, + { + .name = "start & end packets, offset seq", + .hdrs = { + RX_FRAG(FL_S, 1), + RX_FRAG(FL_E, 2), + }, + .n_hdrs = 2, + .rx_len = 2, + }, + { + .name = "start & end packets, out of order", + .hdrs = { + RX_FRAG(FL_E, 1), + RX_FRAG(FL_S, 0), + }, + .n_hdrs = 2, + .rx_len = 0, + }, + { + .name = "start, middle & end packets", + .hdrs = { + RX_FRAG(FL_S, 0), + RX_FRAG(0, 1), + RX_FRAG(FL_E, 2), + }, + .n_hdrs = 3, + .rx_len = 3, + }, + { + .name = "missing seq", + .hdrs = { + RX_FRAG(FL_S, 0), + RX_FRAG(FL_E, 2), + }, + .n_hdrs = 2, + .rx_len = 0, + }, + { + .name = "seq wrap", + .hdrs = { + RX_FRAG(FL_S, 3), + RX_FRAG(FL_E, 0), + }, + .n_hdrs = 2, + .rx_len = 2, + }, +}; + +static void mctp_route_input_sk_reasm_to_desc( + const struct mctp_route_input_sk_reasm_test *t, + char *desc) +{ + sprintf(desc, "%s", t->name); +} + +KUNIT_ARRAY_PARAM(mctp_route_input_sk_reasm, mctp_route_input_sk_reasm_tests, + mctp_route_input_sk_reasm_to_desc); + +static struct kunit_case mctp_test_cases[] = { + KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), + KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), + KUNIT_CASE_PARAM(mctp_test_route_input_sk, mctp_route_input_sk_gen_params), + KUNIT_CASE_PARAM(mctp_test_route_input_sk_reasm, + mctp_route_input_sk_reasm_gen_params), + {} +}; + +static struct kunit_suite mctp_test_suite = { + .name = "mctp", + .test_cases = mctp_test_cases, +}; + +kunit_test_suite(mctp_test_suite); diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c new file mode 100644 index 000000000000..cc6b8803aa9d --- /dev/null +++ b/net/mctp/test/utils.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/netdevice.h> +#include <linux/mctp.h> +#include <linux/if_arp.h> + +#include <net/mctpdevice.h> +#include <net/pkt_sched.h> + +#include "utils.h" + +static netdev_tx_t mctp_test_dev_tx(struct sk_buff *skb, + struct net_device *ndev) +{ + kfree(skb); + return NETDEV_TX_OK; +} + +static const struct net_device_ops mctp_test_netdev_ops = { + .ndo_start_xmit = mctp_test_dev_tx, +}; + +static void mctp_test_dev_setup(struct net_device *ndev) +{ + ndev->type = ARPHRD_MCTP; + ndev->mtu = MCTP_DEV_TEST_MTU; + ndev->hard_header_len = 0; + ndev->addr_len = 0; + ndev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + ndev->flags = IFF_NOARP; + ndev->netdev_ops = &mctp_test_netdev_ops; + ndev->needs_free_netdev = true; +} + +struct mctp_test_dev *mctp_test_create_dev(void) +{ + struct mctp_test_dev *dev; + struct net_device *ndev; + int rc; + + ndev = alloc_netdev(sizeof(*dev), "mctptest%d", NET_NAME_ENUM, + mctp_test_dev_setup); + if (!ndev) + return NULL; + + dev = netdev_priv(ndev); + dev->ndev = ndev; + + rc = register_netdev(ndev); + if (rc) { + free_netdev(ndev); + return NULL; + } + + rcu_read_lock(); + dev->mdev = __mctp_dev_get(ndev); + mctp_dev_hold(dev->mdev); + rcu_read_unlock(); + + return dev; +} + +void mctp_test_destroy_dev(struct mctp_test_dev *dev) +{ + mctp_dev_put(dev->mdev); + unregister_netdev(dev->ndev); +} diff --git a/net/mctp/test/utils.h b/net/mctp/test/utils.h new file mode 100644 index 000000000000..df6aa1c03440 --- /dev/null +++ b/net/mctp/test/utils.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NET_MCTP_TEST_UTILS_H +#define __NET_MCTP_TEST_UTILS_H + +#include <kunit/test.h> + +#define MCTP_DEV_TEST_MTU 68 + +struct mctp_test_dev { + struct net_device *ndev; + struct mctp_dev *mdev; +}; + +struct mctp_test_dev; + +struct mctp_test_dev *mctp_test_create_dev(void); +void mctp_test_destroy_dev(struct mctp_test_dev *dev); + +#endif /* __NET_MCTP_TEST_UTILS_H */ diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index b21ff9be04c6..3240b72271a7 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -72,6 +72,7 @@ bool mptcp_mib_alloc(struct net *net) void mptcp_seq_show(struct seq_file *seq) { + unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1]; struct net *net = seq->private; int i; @@ -81,17 +82,13 @@ void mptcp_seq_show(struct seq_file *seq) seq_puts(seq, "\nMPTcpExt:"); - if (!net->mib.mptcp_statistics) { - for (i = 0; mptcp_snmp_list[i].name; i++) - seq_puts(seq, " 0"); - - seq_putc(seq, '\n'); - return; - } + memset(sum, 0, sizeof(sum)); + if (net->mib.mptcp_statistics) + snmp_get_cpu_field_batch(sum, mptcp_snmp_list, + net->mib.mptcp_statistics); for (i = 0; mptcp_snmp_list[i].name; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.mptcp_statistics, - mptcp_snmp_list[i].entry)); + seq_printf(seq, " %lu", sum[i]); + seq_putc(seq, '\n'); } diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 292374fb0779..f44125dd6697 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -113,37 +113,13 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_info *info = _info; - u32 flags = 0; - bool slow; - u8 val; r->idiag_rqueue = sk_rmem_alloc_get(sk); r->idiag_wqueue = sk_wmem_alloc_get(sk); if (!info) return; - slow = lock_sock_fast(sk); - info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); - info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); - info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); - info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); - info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); - val = mptcp_pm_get_add_addr_signal_max(msk); - info->mptcpi_add_addr_signal_max = val; - val = mptcp_pm_get_add_addr_accept_max(msk); - info->mptcpi_add_addr_accepted_max = val; - info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); - if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) - flags |= MPTCP_INFO_FLAG_FALLBACK; - if (READ_ONCE(msk->can_ack)) - flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; - info->mptcpi_flags = flags; - info->mptcpi_token = READ_ONCE(msk->token); - info->mptcpi_write_seq = READ_ONCE(msk->write_seq); - info->mptcpi_snd_una = READ_ONCE(msk->snd_una); - info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); - info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); - unlock_sock_fast(sk, slow); + mptcp_diag_fill_info(msk, info); } static const struct inet_diag_handler mptcp_diag_handler = { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c41273cefc51..422f4acfb3e6 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -748,9 +748,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, /* can't send MP_PRIO with MPC, as they share the same option space: * 'backup'. Also it makes no sense at all */ - if (!subflow->send_mp_prio || - ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | - OPTION_MPTCP_MPC_ACK) & opts->suboptions)) + if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC)) return false; /* account for the trailing 'nop' option */ @@ -1019,11 +1017,9 @@ static void ack_update_msk(struct mptcp_sock *msk, old_snd_una = msk->snd_una; new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); - /* ACK for data not even sent yet and even above recovery bound? Ignore.*/ - if (unlikely(after64(new_snd_una, snd_nxt))) { - if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt)) - new_snd_una = old_snd_una; - } + /* ACK for data not even sent yet? Ignore.*/ + if (unlikely(after64(new_snd_una, snd_nxt))) + new_snd_una = old_snd_una; new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; @@ -1329,8 +1325,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } } - } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | - OPTION_MPTCP_MPC_ACK) & opts->suboptions) { + } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 050eea231528..7b96be1e9f14 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -654,9 +654,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) } } -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - u8 bkup) +static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup) { struct mptcp_subflow_context *subflow; @@ -2052,6 +2052,9 @@ static int __net_init pm_nl_init_net(struct net *net) struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); INIT_LIST_HEAD_RCU(&pernet->local_addr_list); + + /* Cit. 2 subflows ought to be enough for anybody. */ + pernet->subflows_max = 2; pernet->next_id = 1; pernet->stale_loss_cnt = 4; spin_lock_init(&pernet->lock); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d073b2111382..cd6b11c9b54d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -953,9 +953,7 @@ static void __mptcp_update_wmem(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); -#ifdef CONFIG_LOCKDEP - WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); -#endif + lockdep_assert_held_once(&sk->sk_lock.slock); if (!msk->wmem_reserved) return; @@ -1104,7 +1102,8 @@ out: if (cleaned && tcp_under_memory_pressure(sk)) __mptcp_mem_reclaim_partial(sk); - if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) { + if (snd_una == READ_ONCE(msk->snd_nxt) && + snd_una == READ_ONCE(msk->write_seq)) { if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) mptcp_stop_timer(sk); } else { @@ -1114,9 +1113,8 @@ out: static void __mptcp_clean_una_wakeup(struct sock *sk) { -#ifdef CONFIG_LOCKDEP - WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); -#endif + lockdep_assert_held_once(&sk->sk_lock.slock); + __mptcp_clean_una(sk); mptcp_write_space(sk); } @@ -1221,6 +1219,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) if (likely(__mptcp_add_ext(skb, gfp))) { skb_reserve(skb, MAX_TCP_HEADER); skb->reserved_tailroom = skb->end - skb->tail; + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } __kfree_skb(skb); @@ -1230,31 +1229,23 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) return NULL; } -static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) +static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) { struct sk_buff *skb; - if (ssk->sk_tx_skb_cache) { - skb = ssk->sk_tx_skb_cache; - if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) && - !__mptcp_add_ext(skb, gfp))) - return false; - return true; - } - skb = __mptcp_do_alloc_tx_skb(sk, gfp); if (!skb) - return false; + return NULL; if (likely(sk_wmem_schedule(ssk, skb->truesize))) { - ssk->sk_tx_skb_cache = skb; - return true; + tcp_skb_entail(ssk, skb); + return skb; } kfree_skb(skb); - return false; + return NULL; } -static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held) +static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held) { gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation; @@ -1284,23 +1275,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_sendmsg_info *info) { u64 data_seq = dfrag->data_seq + info->sent; + int offset = dfrag->offset + info->sent; struct mptcp_sock *msk = mptcp_sk(sk); bool zero_window_probe = false; struct mptcp_ext *mpext = NULL; - struct sk_buff *skb, *tail; - bool must_collapse = false; - int size_bias = 0; - int avail_size; - size_t ret = 0; + bool can_coalesce = false; + bool reuse_skb = true; + struct sk_buff *skb; + size_t copy; + int i; pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u", msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); + if (WARN_ON_ONCE(info->sent > info->limit || + info->limit > dfrag->data_len)) + return 0; + /* compute send limit */ info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); - avail_size = info->size_goal; + copy = info->size_goal; + skb = tcp_write_queue_tail(ssk); - if (skb) { + if (skb && copy > skb->len) { /* Limit the write to the size available in the * current skb, if any, so that we create at most a new skb. * Explicitly tells TCP internals to avoid collapsing on later @@ -1313,62 +1310,80 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, goto alloc_skb; } - must_collapse = (info->size_goal > skb->len) && - (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags); - if (must_collapse) { - size_bias = skb->len; - avail_size = info->size_goal - skb->len; + i = skb_shinfo(skb)->nr_frags; + can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); + if (!can_coalesce && i >= sysctl_max_skb_frags) { + tcp_mark_push(tcp_sk(ssk), skb); + goto alloc_skb; } - } + copy -= skb->len; + } else { alloc_skb: - if (!must_collapse && - !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held)) - return 0; + skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held); + if (!skb) + return -ENOMEM; + + i = skb_shinfo(skb)->nr_frags; + reuse_skb = false; + mpext = skb_ext_find(skb, SKB_EXT_MPTCP); + } /* Zero window and all data acked? Probe. */ - avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size); - if (avail_size == 0) { + copy = mptcp_check_allowed_size(msk, data_seq, copy); + if (copy == 0) { u64 snd_una = READ_ONCE(msk->snd_una); - if (skb || snd_una != msk->snd_nxt) + if (snd_una != msk->snd_nxt) { + tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk)); return 0; + } + zero_window_probe = true; data_seq = snd_una - 1; - avail_size = 1; - } + copy = 1; - if (WARN_ON_ONCE(info->sent > info->limit || - info->limit > dfrag->data_len)) - return 0; + /* all mptcp-level data is acked, no skbs should be present into the + * ssk write queue + */ + WARN_ON_ONCE(reuse_skb); + } - ret = info->limit - info->sent; - tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags, - dfrag->page, dfrag->offset + info->sent, &ret); - if (!tail) { - tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk)); + copy = min_t(size_t, copy, info->limit - info->sent); + if (!sk_wmem_schedule(ssk, copy)) { + tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk)); return -ENOMEM; } - /* if the tail skb is still the cached one, collapsing really happened. - */ - if (skb == tail) { - TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH; - mpext->data_len += ret; + if (can_coalesce) { + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + } else { + get_page(dfrag->page); + skb_fill_page_desc(skb, i, dfrag->page, offset, copy); + } + + skb->len += copy; + skb->data_len += copy; + skb->truesize += copy; + sk_wmem_queued_add(ssk, copy); + sk_mem_charge(ssk, copy); + skb->ip_summed = CHECKSUM_PARTIAL; + WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy); + TCP_SKB_CB(skb)->end_seq += copy; + tcp_skb_pcount_set(skb, 0); + + /* on skb reuse we just need to update the DSS len */ + if (reuse_skb) { + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + mpext->data_len += copy; WARN_ON_ONCE(zero_window_probe); goto out; } - mpext = skb_ext_find(tail, SKB_EXT_MPTCP); - if (WARN_ON_ONCE(!mpext)) { - /* should never reach here, stream corrupted */ - return -EINVAL; - } - memset(mpext, 0, sizeof(*mpext)); mpext->data_seq = data_seq; mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq; - mpext->data_len = ret; + mpext->data_len = copy; mpext->use_map = 1; mpext->dsn64 = 1; @@ -1377,18 +1392,18 @@ alloc_skb: mpext->dsn64); if (zero_window_probe) { - mptcp_subflow_ctx(ssk)->rel_write_seq += ret; + mptcp_subflow_ctx(ssk)->rel_write_seq += copy; mpext->frozen = 1; if (READ_ONCE(msk->csum_enabled)) - mptcp_update_data_checksum(tail, ret); + mptcp_update_data_checksum(skb, copy); tcp_push_pending_frames(ssk); return 0; } out: if (READ_ONCE(msk->csum_enabled)) - mptcp_update_data_checksum(tail, ret); - mptcp_subflow_ctx(ssk)->rel_write_seq += ret; - return ret; + mptcp_update_data_checksum(skb, copy); + mptcp_subflow_ctx(ssk)->rel_write_seq += copy; + return copy; } #define MPTCP_SEND_BURST_SIZE ((1 << 16) - \ @@ -1505,6 +1520,38 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk, release_sock(ssk); } +static void mptcp_update_post_push(struct mptcp_sock *msk, + struct mptcp_data_frag *dfrag, + u32 sent) +{ + u64 snd_nxt_new = dfrag->data_seq; + + dfrag->already_sent += sent; + + msk->snd_burst -= sent; + + snd_nxt_new += dfrag->already_sent; + + /* snd_nxt_new can be smaller than snd_nxt in case mptcp + * is recovering after a failover. In that event, this re-sends + * old segments. + * + * Thus compute snd_nxt_new candidate based on + * the dfrag->data_seq that was sent and the data + * that has been handed to the subflow for transmission + * and skip update in case it was old dfrag. + */ + if (likely(after64(snd_nxt_new, msk->snd_nxt))) + msk->snd_nxt = snd_nxt_new; +} + +static void mptcp_check_and_set_pending(struct sock *sk) +{ + if (mptcp_send_head(sk) && + !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) + set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); +} + void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; @@ -1548,12 +1595,10 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) } info.sent += ret; - dfrag->already_sent += ret; - msk->snd_nxt += ret; - msk->snd_burst -= ret; - msk->tx_pending_data -= ret; copied += ret; len -= ret; + + mptcp_update_post_push(msk, dfrag, ret); } WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); } @@ -1606,13 +1651,11 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) goto out; info.sent += ret; - dfrag->already_sent += ret; - msk->snd_nxt += ret; - msk->snd_burst -= ret; - msk->tx_pending_data -= ret; copied += ret; len -= ret; first = false; + + mptcp_update_post_push(msk, dfrag, ret); } WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); } @@ -1722,7 +1765,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) frag_truesize += psize; pfrag->offset += frag_truesize; WRITE_ONCE(msk->write_seq, msk->write_seq + psize); - msk->tx_pending_data += psize; /* charge data on mptcp pending queue to the msk socket * Note: we charge such data both to sk and ssk @@ -2183,15 +2225,11 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) return false; } - /* will accept ack for reijected data before re-sending them */ - if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt)) - msk->recovery_snd_nxt = msk->snd_nxt; + msk->recovery_snd_nxt = msk->snd_nxt; msk->recovery = true; mptcp_data_unlock(sk); msk->first_pending = rtx_head; - msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq; - msk->snd_nxt = rtx_head->data_seq; msk->snd_burst = 0; /* be sure to clear the "sent status" on all re-injected fragments */ @@ -2353,6 +2391,9 @@ static void __mptcp_retrans(struct sock *sk) int ret; mptcp_clean_una_wakeup(sk); + + /* first check ssk: need to kick "stale" logic */ + ssk = mptcp_subflow_get_retrans(msk); dfrag = mptcp_rtx_head(sk); if (!dfrag) { if (mptcp_data_fin_enabled(msk)) { @@ -2365,10 +2406,12 @@ static void __mptcp_retrans(struct sock *sk) goto reset_timer; } - return; + if (!mptcp_send_head(sk)) + return; + + goto reset_timer; } - ssk = mptcp_subflow_get_retrans(msk); if (!ssk) goto reset_timer; @@ -2395,6 +2438,8 @@ static void __mptcp_retrans(struct sock *sk) release_sock(ssk); reset_timer: + mptcp_check_and_set_pending(sk); + if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); } @@ -2461,7 +2506,6 @@ static int __mptcp_init_sock(struct sock *sk) msk->first_pending = NULL; msk->wmem_reserved = 0; WRITE_ONCE(msk->rmem_released, 0); - msk->tx_pending_data = 0; msk->timer_ival = TCP_RTO_MIN; msk->first = NULL; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index dc984676c5eb..284fdcec067e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -254,7 +254,6 @@ struct mptcp_sock { struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; struct sk_buff_head receive_queue; - int tx_pending_data; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; @@ -737,9 +736,6 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8c03afac5ca0..0f1e661c2032 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -14,6 +14,8 @@ #include <net/mptcp.h> #include "protocol.h" +#define MIN_INFO_OPTLEN_SIZE 16 + static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); @@ -670,6 +672,266 @@ out: return ret; } +void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) +{ + struct sock *sk = &msk->sk.icsk_inet.sk; + u32 flags = 0; + bool slow; + u8 val; + + memset(info, 0, sizeof(*info)); + + slow = lock_sock_fast(sk); + + info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); + info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); + info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); + info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); + info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); + val = mptcp_pm_get_add_addr_signal_max(msk); + info->mptcpi_add_addr_signal_max = val; + val = mptcp_pm_get_add_addr_accept_max(msk); + info->mptcpi_add_addr_accepted_max = val; + info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); + if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) + flags |= MPTCP_INFO_FLAG_FALLBACK; + if (READ_ONCE(msk->can_ack)) + flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; + info->mptcpi_flags = flags; + info->mptcpi_token = READ_ONCE(msk->token); + info->mptcpi_write_seq = READ_ONCE(msk->write_seq); + info->mptcpi_snd_una = READ_ONCE(msk->snd_una); + info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); + info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); + + unlock_sock_fast(sk, slow); +} +EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); + +static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) +{ + struct mptcp_info m_info; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(struct mptcp_info)); + + mptcp_diag_fill_info(msk, &m_info); + + if (put_user(len, optlen)) + return -EFAULT; + + if (copy_to_user(optval, &m_info, len)) + return -EFAULT; + + return 0; +} + +static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, + char __user *optval, + u32 copied, + int __user *optlen) +{ + u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); + + if (copied) + copied += sfd->size_subflow_data; + else + copied = copylen; + + if (put_user(copied, optlen)) + return -EFAULT; + + if (copy_to_user(optval, sfd, copylen)) + return -EFAULT; + + return 0; +} + +static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, + char __user *optval, int __user *optlen) +{ + int len, copylen; + + if (get_user(len, optlen)) + return -EFAULT; + + /* if mptcp_subflow_data size is changed, need to adjust + * this function to deal with programs using old version. + */ + BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); + + if (len < MIN_INFO_OPTLEN_SIZE) + return -EINVAL; + + memset(sfd, 0, sizeof(*sfd)); + + copylen = min_t(unsigned int, len, sizeof(*sfd)); + if (copy_from_user(sfd, optval, copylen)) + return -EFAULT; + + /* size_subflow_data is u32, but len is signed */ + if (sfd->size_subflow_data > INT_MAX || + sfd->size_user > INT_MAX) + return -EINVAL; + + if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || + sfd->size_subflow_data > len) + return -EINVAL; + + if (sfd->num_subflows || sfd->size_kernel) + return -EINVAL; + + return len - sfd->size_subflow_data; +} + +static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, + int __user *optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = &msk->sk.icsk_inet.sk; + unsigned int sfcount = 0, copied = 0; + struct mptcp_subflow_data sfd; + char __user *infoptr; + int len; + + len = mptcp_get_subflow_data(&sfd, optval, optlen); + if (len < 0) + return len; + + sfd.size_kernel = sizeof(struct tcp_info); + sfd.size_user = min_t(unsigned int, sfd.size_user, + sizeof(struct tcp_info)); + + infoptr = optval + sfd.size_subflow_data; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + ++sfcount; + + if (len && len >= sfd.size_user) { + struct tcp_info info; + + tcp_get_info(ssk, &info); + + if (copy_to_user(infoptr, &info, sfd.size_user)) { + release_sock(sk); + return -EFAULT; + } + + infoptr += sfd.size_user; + copied += sfd.size_user; + len -= sfd.size_user; + } + } + + release_sock(sk); + + sfd.num_subflows = sfcount; + + if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) + return -EFAULT; + + return 0; +} + +static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) +{ + struct inet_sock *inet = inet_sk(sk); + + memset(a, 0, sizeof(*a)); + + if (sk->sk_family == AF_INET) { + a->sin_local.sin_family = AF_INET; + a->sin_local.sin_port = inet->inet_sport; + a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; + + if (!a->sin_local.sin_addr.s_addr) + a->sin_local.sin_addr.s_addr = inet->inet_saddr; + + a->sin_remote.sin_family = AF_INET; + a->sin_remote.sin_port = inet->inet_dport; + a->sin_remote.sin_addr.s_addr = inet->inet_daddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + if (WARN_ON_ONCE(!np)) + return; + + a->sin6_local.sin6_family = AF_INET6; + a->sin6_local.sin6_port = inet->inet_sport; + + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + a->sin6_local.sin6_addr = np->saddr; + else + a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; + + a->sin6_remote.sin6_family = AF_INET6; + a->sin6_remote.sin6_port = inet->inet_dport; + a->sin6_remote.sin6_addr = sk->sk_v6_daddr; +#endif + } +} + +static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, + int __user *optlen) +{ + struct sock *sk = &msk->sk.icsk_inet.sk; + struct mptcp_subflow_context *subflow; + unsigned int sfcount = 0, copied = 0; + struct mptcp_subflow_data sfd; + char __user *addrptr; + int len; + + len = mptcp_get_subflow_data(&sfd, optval, optlen); + if (len < 0) + return len; + + sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); + sfd.size_user = min_t(unsigned int, sfd.size_user, + sizeof(struct mptcp_subflow_addrs)); + + addrptr = optval + sfd.size_subflow_data; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + ++sfcount; + + if (len && len >= sfd.size_user) { + struct mptcp_subflow_addrs a; + + mptcp_get_sub_addrs(ssk, &a); + + if (copy_to_user(addrptr, &a, sfd.size_user)) { + release_sock(sk); + return -EFAULT; + } + + addrptr += sfd.size_user; + copied += sfd.size_user; + len -= sfd.size_user; + } + } + + release_sock(sk); + + sfd.num_subflows = sfcount; + + if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) + return -EFAULT; + + return 0; +} + static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { @@ -684,6 +946,21 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; } +static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, + char __user *optval, int __user *optlen) +{ + switch (optname) { + case MPTCP_INFO: + return mptcp_getsockopt_info(msk, optval, optlen); + case MPTCP_TCPINFO: + return mptcp_getsockopt_tcpinfo(msk, optval, optlen); + case MPTCP_SUBFLOW_ADDRS: + return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); + } + + return -EOPNOTSUPP; +} + int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option) { @@ -706,6 +983,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, if (level == SOL_TCP) return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); + if (level == SOL_MPTCP) + return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); return -EOPNOTSUPP; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 92a747896f80..3646fc195e7d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -10,6 +10,17 @@ config NETFILTER_INGRESS This allows you to classify packets from ingress using the Netfilter infrastructure. +config NETFILTER_EGRESS + bool "Netfilter egress support" + default y + select NET_EGRESS + help + This allows you to classify packets before transmission using the + Netfilter infrastructure. + +config NETFILTER_SKIP_EGRESS + def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB) + config NETFILTER_NETLINK tristate diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 63d032191e62..6dec9cd395f1 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -317,6 +317,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, return &dev->nf_hooks_ingress; } #endif +#ifdef CONFIG_NETFILTER_EGRESS + if (hooknum == NF_NETDEV_EGRESS) { + if (dev && dev_net(dev) == net) + return &dev->nf_hooks_egress; + } +#endif WARN_ON_ONCE(1); return NULL; } @@ -335,7 +341,8 @@ static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg, return 0; } -static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) +static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg, + int pf) { if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) || (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS)) @@ -344,6 +351,12 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) return false; } +static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg, + int pf) +{ + return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS; +} + static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf) { #ifdef CONFIG_JUMP_LABEL @@ -383,9 +396,18 @@ static int __nf_register_net_hook(struct net *net, int pf, switch (pf) { case NFPROTO_NETDEV: - err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS); - if (err < 0) - return err; +#ifndef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) + return -EOPNOTSUPP; +#endif +#ifndef CONFIG_NETFILTER_EGRESS + if (reg->hooknum == NF_NETDEV_EGRESS) + return -EOPNOTSUPP; +#endif + if ((reg->hooknum != NF_NETDEV_INGRESS && + reg->hooknum != NF_NETDEV_EGRESS) || + !reg->dev || dev_net(reg->dev) != net) + return -EINVAL; break; case NFPROTO_INET: if (reg->hooknum != NF_INET_INGRESS) @@ -418,6 +440,10 @@ static int __nf_register_net_hook(struct net *net, int pf, if (nf_ingress_hook(reg, pf)) net_inc_ingress_queue(); #endif +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_egress_hook(reg, pf)) + net_inc_egress_queue(); +#endif nf_static_key_inc(reg, pf); BUG_ON(p == new_hooks); @@ -475,6 +501,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf, if (nf_ingress_hook(reg, pf)) net_dec_ingress_queue(); #endif +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_egress_hook(reg, pf)) + net_dec_egress_queue(); +#endif nf_static_key_dec(reg, pf); } else { WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 128690c512df..e93c937a8bf0 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1330,12 +1330,15 @@ drop: * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int -ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct netns_ipvs *ipvs = net_ipvs(state->net); + unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + int af = state->pf; struct sock *sk; EnterFunction(11); @@ -1468,56 +1471,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in return NF_ACCEPT; } -/* - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, - * used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_reply4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -/* - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_local_reply4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -#ifdef CONFIG_IP_VS_IPV6 - -/* - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, - * used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_reply6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -/* - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_local_reply6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -#endif - static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -1957,8 +1910,10 @@ out: * and send it on its way... */ static unsigned int -ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct netns_ipvs *ipvs = net_ipvs(state->net); + unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; @@ -1966,6 +1921,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int int ret, pkts; int conn_reuse_mode; struct sock *sk; + int af = state->pf; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -2138,55 +2094,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int } /* - * AF_INET handler in NF_INET_LOCAL_IN chain - * Schedule and forward packets from remote clients - */ -static unsigned int -ip_vs_remote_request4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -/* - * AF_INET handler in NF_INET_LOCAL_OUT chain - * Schedule and forward packets from local clients - */ -static unsigned int -ip_vs_local_request4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -#ifdef CONFIG_IP_VS_IPV6 - -/* - * AF_INET6 handler in NF_INET_LOCAL_IN chain - * Schedule and forward packets from remote clients - */ -static unsigned int -ip_vs_remote_request6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -/* - * AF_INET6 handler in NF_INET_LOCAL_OUT chain - * Schedule and forward packets from local clients - */ -static unsigned int -ip_vs_local_request6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -#endif - - -/* * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP * related packets destined for 0.0.0.0/0. * When fwmark-based virtual service is used, such as transparent @@ -2199,45 +2106,36 @@ static unsigned int ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - int r; struct netns_ipvs *ipvs = net_ipvs(state->net); - - if (ip_hdr(skb)->protocol != IPPROTO_ICMP) - return NF_ACCEPT; + int r; /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(ipvs, skb, &r, state->hook); -} - + if (state->pf == NFPROTO_IPV4) { + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) + return NF_ACCEPT; #ifdef CONFIG_IP_VS_IPV6 -static unsigned int -ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - int r; - struct netns_ipvs *ipvs = net_ipvs(state->net); - struct ip_vs_iphdr iphdr; + } else { + struct ip_vs_iphdr iphdr; - ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); - if (iphdr.protocol != IPPROTO_ICMPV6) - return NF_ACCEPT; + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); - /* ipvs enabled in this netns ? */ - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) - return NF_ACCEPT; + if (iphdr.protocol != IPPROTO_ICMPV6) + return NF_ACCEPT; - return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); -} + return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); #endif + } + return ip_vs_in_icmp(ipvs, skb, &r, state->hook); +} static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 2, @@ -2246,21 +2144,21 @@ static const struct nf_hook_ops ip_vs_ops4[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_remote_request4, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_local_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_local_request4, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 2, @@ -2275,7 +2173,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 100, @@ -2286,7 +2184,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 2, @@ -2295,21 +2193,21 @@ static const struct nf_hook_ops ip_vs_ops6[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_remote_request6, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_local_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_local_request6, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 2, @@ -2317,14 +2215,14 @@ static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { - .hook = ip_vs_forward_icmp_v6, + .hook = ip_vs_forward_icmp, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 99, }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 100, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 29ec3ef63edc..e62b40bd349e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2017,6 +2017,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "run_estimation", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -4090,6 +4096,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; + ipvs->sysctl_run_estimation = 1; + tbl[idx++].data = &ipvs->sysctl_run_estimation; #ifdef CONFIG_IP_VS_DEBUG /* Global sysctls must be ro in non-init netns */ if (!net_eq(net, &init_net)) diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 05b8112ffb37..9a1a7af6a186 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -100,6 +100,9 @@ static void estimation_timer(struct timer_list *t) u64 rate; struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); + if (!sysctl_run_estimation(ipvs)) + goto skip; + spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); @@ -131,6 +134,8 @@ static void estimation_timer(struct timer_list *t) spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); + +skip: mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index f554e2ea32ee..d5c719c9e36c 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -185,7 +185,7 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; -#ifdef CONFIG_NETFILTER_INGRESS +#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) struct net_device *netdev; #endif @@ -221,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); break; #endif -#ifdef CONFIG_NETFILTER_INGRESS +#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) case NFPROTO_NETDEV: - if (hook != NF_NETDEV_INGRESS) + if (hook >= NF_NETDEV_NUMHOOKS) return ERR_PTR(-EOPNOTSUPP); if (!dev) @@ -233,7 +233,15 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de if (!netdev) return ERR_PTR(-ENODEV); - return rcu_dereference(netdev->nf_hooks_ingress); +#ifdef CONFIG_NETFILTER_INGRESS + if (hook == NF_NETDEV_INGRESS) + return rcu_dereference(netdev->nf_hooks_ingress); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + if (hook == NF_NETDEV_EGRESS) + return rcu_dereference(netdev->nf_hooks_egress); +#endif + fallthrough; #endif default: return ERR_PTR(-EPROTONOSUPPORT); diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 3ced0eb6b7c3..c3563f0be269 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -310,9 +310,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, - .hook_mask = (1 << NF_NETDEV_INGRESS), + .hook_mask = (1 << NF_NETDEV_INGRESS) | + (1 << NF_NETDEV_EGRESS), .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, + [NF_NETDEV_EGRESS] = nft_do_chain_netdev, }, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 6ba3256fa844..87f3af4645d9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -198,17 +198,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return -EBUSY; priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); - switch (priv->op) { - case NFT_DYNSET_OP_ADD: - case NFT_DYNSET_OP_DELETE: - break; - case NFT_DYNSET_OP_UPDATE: - if (!(set->flags & NFT_SET_TIMEOUT)) - return -EOPNOTSUPP; - break; - default: + if (priv->op > NFT_DYNSET_OP_DELETE) return -EOPNOTSUPP; - } timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 0d5c422f8745..8aec1b529364 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -94,11 +94,11 @@ static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_rateest_target_info *info = par->targinfo; - struct gnet_stats_basic_packed *stats = &info->est->bstats; + struct gnet_stats_basic_sync *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); - stats->bytes += skb->len; - stats->packets++; + u64_stats_add(&stats->bytes, skb->len); + u64_stats_inc(&stats->packets); spin_unlock_bh(&info->est->lock); return XT_CONTINUE; @@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) if (!est) goto err1; + gnet_stats_basic_sync_init(&est->bstats); strlcpy(est->name, info->name, sizeof(est->name)); spin_lock_init(&est->lock); est->refcnt = 1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ada47e59647a..4c575324a985 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1412,8 +1412,6 @@ struct netlink_broadcast_data { int delivered; gfp_t allocation; struct sk_buff *skb, *skb2; - int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data); - void *tx_data; }; static void do_one_broadcast(struct sock *sk, @@ -1467,11 +1465,6 @@ static void do_one_broadcast(struct sock *sk, p->delivery_failure = 1; goto out; } - if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { - kfree_skb(p->skb2); - p->skb2 = NULL; - goto out; - } if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; @@ -1494,10 +1487,8 @@ out: sock_put(sk); } -int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, - u32 group, gfp_t allocation, - int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), - void *filter_data) +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, + u32 group, gfp_t allocation) { struct net *net = sock_net(ssk); struct netlink_broadcast_data info; @@ -1516,8 +1507,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid info.allocation = allocation; info.skb = skb; info.skb2 = NULL; - info.tx_filter = filter; - info.tx_data = filter_data; /* While we sleep in clone, do not allow to change socket list */ @@ -1543,14 +1532,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid } return -ESRCH; } -EXPORT_SYMBOL(netlink_broadcast_filtered); - -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, - u32 group, gfp_t allocation) -{ - return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, - NULL, NULL); -} EXPORT_SYMBOL(netlink_broadcast); struct netlink_set_err_data { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 6d16e1ab1a8a..775064cdd0ee 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -633,7 +633,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; - ax25_address *source = NULL; + const ax25_address *source = NULL; ax25_uid_assoc *user; struct net_device *dev; int err = 0; @@ -673,7 +673,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENETUNREACH; goto out_release; } - source = (ax25_address *)dev->dev_addr; + source = (const ax25_address *)dev->dev_addr; user = ax25_findbyuid(current_euid()); if (user) { diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 29e418c8c6c3..3aaac4a22b38 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -108,10 +108,10 @@ static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) if (err) return err; - ax25_listen_release((ax25_address *)dev->dev_addr, NULL); + ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); } - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } @@ -120,7 +120,7 @@ static int nr_open(struct net_device *dev) { int err; - err = ax25_listen_register((ax25_address *)dev->dev_addr, NULL); + err = ax25_listen_register((const ax25_address *)dev->dev_addr, NULL); if (err) return err; @@ -131,7 +131,7 @@ static int nr_open(struct net_device *dev) static int nr_close(struct net_device *dev) { - ax25_listen_release((ax25_address *)dev->dev_addr, NULL); + ax25_listen_release((const ax25_address *)dev->dev_addr, NULL); netif_stop_queue(dev); return 0; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index ddd5cbd455e3..baea3cbd76ca 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -598,7 +598,7 @@ struct net_device *nr_dev_get(ax25_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && - ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { + ax25cmp(addr, (const ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } @@ -825,7 +825,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) ax25s = nr_neigh->ax25; nr_neigh->ax25 = ax25_send_frame(skb, 256, - (ax25_address *)dev->dev_addr, + (const ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev); if (ax25s) diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 3a89bd9b89fc..af6bacb3ba98 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -114,8 +114,6 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, { u8 pipe; - pr_debug("\n"); - pipe = hdev->gate2pipe[gate]; if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -130,8 +128,6 @@ int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd, { u8 pipe; - pr_debug("\n"); - pipe = hdev->gate2pipe[gate]; if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -205,8 +201,6 @@ static int nfc_hci_open_pipe(struct nfc_hci_dev *hdev, u8 pipe) static int nfc_hci_close_pipe(struct nfc_hci_dev *hdev, u8 pipe) { - pr_debug("\n"); - return nfc_hci_execute_cmd(hdev, pipe, NFC_HCI_ANY_CLOSE_PIPE, NULL, 0, NULL); } @@ -242,8 +236,6 @@ static u8 nfc_hci_create_pipe(struct nfc_hci_dev *hdev, u8 dest_host, static int nfc_hci_delete_pipe(struct nfc_hci_dev *hdev, u8 pipe) { - pr_debug("\n"); - return nfc_hci_execute_cmd(hdev, NFC_HCI_ADMIN_PIPE, NFC_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } @@ -256,8 +248,6 @@ static int nfc_hci_clear_all_pipes(struct nfc_hci_dev *hdev) /* TODO: Find out what the identity reference data is * and fill param with it. HCI spec 6.1.3.5 */ - pr_debug("\n"); - if (test_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &hdev->quirks)) param_len = 0; @@ -271,8 +261,6 @@ int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate) int r; u8 pipe = hdev->gate2pipe[gate]; - pr_debug("\n"); - if (pipe == NFC_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; @@ -296,8 +284,6 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) { int r; - pr_debug("\n"); - r = nfc_hci_clear_all_pipes(hdev); if (r < 0) return r; @@ -314,8 +300,6 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, bool pipe_created = false; int r; - pr_debug("\n"); - if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE) return 0; diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index aef750d7787c..e90f70385813 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -201,8 +201,7 @@ static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr) del_timer_sync(&shdlc->t2_timer); shdlc->t2_active = false; - pr_debug - ("All sent frames acked. Stopped T2(retransmit)\n"); + pr_debug("All sent frames acked. Stopped T2(retransmit)\n"); } } else { skb = skb_peek(&shdlc->ack_pending_q); @@ -211,8 +210,7 @@ static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr) msecs_to_jiffies(SHDLC_T2_VALUE_MS)); shdlc->t2_active = true; - pr_debug - ("Start T2(retransmit) for remaining unacked sent frames\n"); + pr_debug("Start T2(retransmit) for remaining unacked sent frames\n"); } } @@ -365,8 +363,6 @@ static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc) { struct sk_buff *skb; - pr_debug("\n"); - skb = llc_shdlc_alloc_skb(shdlc, 2); if (skb == NULL) return -ENOMEM; @@ -381,8 +377,6 @@ static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc) { struct sk_buff *skb; - pr_debug("\n"); - skb = llc_shdlc_alloc_skb(shdlc, 0); if (skb == NULL) return -ENOMEM; @@ -522,12 +516,11 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) unsigned long time_sent; if (shdlc->send_q.qlen) - pr_debug - ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n", - shdlc->send_q.qlen, shdlc->ns, shdlc->dnr, - shdlc->rnr == false ? "false" : "true", - shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr), - shdlc->ack_pending_q.qlen); + pr_debug("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n", + shdlc->send_q.qlen, shdlc->ns, shdlc->dnr, + shdlc->rnr == false ? "false" : "true", + shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr), + shdlc->ack_pending_q.qlen); while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w && (shdlc->rnr == false)) { @@ -573,8 +566,6 @@ static void llc_shdlc_connect_timeout(struct timer_list *t) { struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer); - pr_debug("\n"); - schedule_work(&shdlc->sm_work); } @@ -601,8 +592,6 @@ static void llc_shdlc_sm_work(struct work_struct *work) struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work); int r; - pr_debug("\n"); - mutex_lock(&shdlc->state_mutex); switch (shdlc->state) { @@ -649,8 +638,7 @@ static void llc_shdlc_sm_work(struct work_struct *work) llc_shdlc_handle_send_queue(shdlc); if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) { - pr_debug - ("Handle T1(send ack) elapsed (T1 now inactive)\n"); + pr_debug("Handle T1(send ack) elapsed (T1 now inactive)\n"); shdlc->t1_active = false; r = llc_shdlc_send_s_frame(shdlc, S_FRAME_RR, @@ -660,8 +648,7 @@ static void llc_shdlc_sm_work(struct work_struct *work) } if (shdlc->t2_active && timer_pending(&shdlc->t2_timer) == 0) { - pr_debug - ("Handle T2(retransmit) elapsed (T2 inactive)\n"); + pr_debug("Handle T2(retransmit) elapsed (T2 inactive)\n"); shdlc->t2_active = false; @@ -686,8 +673,6 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq); - pr_debug("\n"); - mutex_lock(&shdlc->state_mutex); shdlc->state = SHDLC_CONNECTING; @@ -706,8 +691,6 @@ static int llc_shdlc_connect(struct llc_shdlc *shdlc) static void llc_shdlc_disconnect(struct llc_shdlc *shdlc) { - pr_debug("\n"); - mutex_lock(&shdlc->state_mutex); shdlc->state = SHDLC_DISCONNECTED; diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 3c4172a5aeb5..41e3a20c8935 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -337,8 +337,6 @@ int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) struct nfc_dev *dev; struct nfc_llcp_local *local; - pr_debug("Sending DISC\n"); - local = sock->local; if (local == NULL) return -ENODEV; @@ -362,8 +360,6 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) struct nfc_llcp_local *local; u16 size = 0; - pr_debug("Sending SYMM\n"); - local = nfc_llcp_find_local(dev); if (local == NULL) return -ENODEV; @@ -399,8 +395,6 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) u16 size = 0; __be16 miux; - pr_debug("Sending CONNECT\n"); - local = sock->local; if (local == NULL) return -ENODEV; @@ -475,8 +469,6 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) u16 size = 0; __be16 miux; - pr_debug("Sending CC\n"); - local = sock->local; if (local == NULL) return -ENODEV; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index eaeb2b1cfa6a..5ad5157aa9c5 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -45,8 +45,6 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) struct nfc_llcp_local *local = sock->local; struct sk_buff *s, *tmp; - pr_debug("%p\n", &sock->sk); - skb_queue_purge(&sock->tx_queue); skb_queue_purge(&sock->tx_pending_queue); @@ -1505,9 +1503,8 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) { struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; - pr_debug("Received an LLCP PDU\n"); if (err < 0) { - pr_err("err %d\n", err); + pr_err("LLCP PDU receive err %d\n", err); return; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 82ab39d80726..6fd873aa86be 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -930,8 +930,6 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; - pr_debug("entry\n"); - if (!ndev->target_active_prot) { pr_err("unable to deactivate target, no active target\n"); return; @@ -977,8 +975,6 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev) struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; - pr_debug("entry\n"); - if (nfc_dev->rf_mode == NFC_RF_INITIATOR) { nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE); } else { diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index e199912ee1e5..19703a649b5a 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -432,8 +432,6 @@ void nci_hci_data_received_cb(void *context, struct sk_buff *frag_skb; int msg_len; - pr_debug("\n"); - if (err) { nci_req_complete(ndev, err); return; @@ -547,8 +545,6 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) { - pr_debug("\n"); - return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index c5eacaac41ae..282c51051dcc 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -738,8 +738,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, const struct nci_nfcee_discover_ntf *nfcee_ntf = (struct nci_nfcee_discover_ntf *)skb->data; - pr_debug("\n"); - /* NFCForum NCI 9.2.1 HCI Network Specific Handling * If the NFCC supports the HCI Network, it SHALL return one, * and only one, NFCEE_DISCOVER_NTF with a Protocol type of @@ -751,12 +749,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, nci_req_complete(ndev, status); } -static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev, - const struct sk_buff *skb) -{ - pr_debug("\n"); -} - void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u16 ntf_opcode = nci_opcode(skb->data); @@ -813,7 +805,6 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) break; case NCI_OP_RF_NFCEE_ACTION_NTF: - nci_nfcee_action_ntf_packet(ndev, skb); break; default: diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 502e7a3f8948..57500c262cc3 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2015, Marvell International Ltd. * - * This software file (the "File") is distributed by Marvell International - * Ltd. under the terms of the GNU General Public License Version 2, June 1991 - * (the "License"). You may use, redistribute and/or modify this File in - * accordance with the terms and conditions of the License, a copy of which - * is available on the worldwide web at - * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. - * - * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE - * ARE EXPRESSLY DISCLAIMED. The License provides additional details about - * this warranty disclaimer. - */ - -/* Inspired (hugely) by HCI LDISC implementation in Bluetooth. + * Inspired (hugely) by HCI LDISC implementation in Bluetooth. * * Copyright (C) 2000-2001 Qualcomm Incorporated * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2a2bc64f75cf..46943a18a10d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -91,6 +91,7 @@ #endif #include <linux/bpf.h> #include <net/compat.h> +#include <linux/netfilter_netdev.h> #include "internal.h" @@ -241,8 +242,42 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +#ifdef CONFIG_NETFILTER_EGRESS +static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) +{ + struct sk_buff *next, *head = NULL, *tail; + int rc; + + rcu_read_lock(); + for (; skb != NULL; skb = next) { + next = skb->next; + skb_mark_not_on_list(skb); + + if (!nf_hook_egress(skb, &rc, skb->dev)) + continue; + + if (!head) + head = skb; + else + tail->next = skb; + + tail = skb; + } + rcu_read_unlock(); + + return head; +} +#endif + static int packet_direct_xmit(struct sk_buff *skb) { +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_hook_egress_active()) { + skb = nf_hook_direct_egress(skb); + if (!skb) + return NET_XMIT_DROP; + } +#endif return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); } diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile index 1b1411d158a7..8e0605f88a73 100644 --- a/net/qrtr/Makefile +++ b/net/qrtr/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_QRTR) := qrtr.o ns.o +obj-$(CONFIG_QRTR) += qrtr.o +qrtr-y := af_qrtr.o ns.o obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o qrtr-smd-y := smd.o diff --git a/net/qrtr/qrtr.c b/net/qrtr/af_qrtr.c index ec2322529727..ec2322529727 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/af_qrtr.c diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index cf7d974e0f61..30a1cf4c16c6 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -109,7 +109,7 @@ char *rose2asc(char *buf, const rose_address *addr) /* * Compare two ROSE addresses, 0 == equal. */ -int rosecmp(rose_address *addr1, rose_address *addr2) +int rosecmp(const rose_address *addr1, const rose_address *addr2) { int i; @@ -123,7 +123,8 @@ int rosecmp(rose_address *addr1, rose_address *addr2) /* * Compare two ROSE addresses for only mask digits, 0 == equal. */ -int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask) +int rosecmpm(const rose_address *addr1, const rose_address *addr2, + unsigned short mask) { unsigned int i, j; diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 051804fbee17..f1a76a5820f1 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -66,10 +66,10 @@ static int rose_set_mac_address(struct net_device *dev, void *addr) if (err) return err; - rose_del_loopback_node((rose_address *)dev->dev_addr); + rose_del_loopback_node((const rose_address *)dev->dev_addr); } - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + dev_addr_set(dev, sa->sa_data); return 0; } @@ -78,7 +78,7 @@ static int rose_open(struct net_device *dev) { int err; - err = rose_add_loopback_node((rose_address *)dev->dev_addr); + err = rose_add_loopback_node((const rose_address *)dev->dev_addr); if (err) return err; @@ -90,7 +90,7 @@ static int rose_open(struct net_device *dev) static int rose_close(struct net_device *dev) { netif_stop_queue(dev); - rose_del_loopback_node((rose_address *)dev->dev_addr); + rose_del_loopback_node((const rose_address *)dev->dev_addr); return 0; } diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index f6102e6f5161..8b96a56d3a49 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -94,11 +94,11 @@ static void rose_t0timer_expiry(struct timer_list *t) */ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) { - ax25_address *rose_call; + const ax25_address *rose_call; ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (ax25_address *)neigh->dev->dev_addr; + rose_call = (const ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; @@ -117,11 +117,11 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) */ static int rose_link_up(struct rose_neigh *neigh) { - ax25_address *rose_call; + const ax25_address *rose_call; ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) - rose_call = (ax25_address *)neigh->dev->dev_addr; + rose_call = (const ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index c0e04c261a15..e2e6b6b78578 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -401,7 +401,7 @@ void rose_add_loopback_neigh(void) /* * Add a loopback node. */ -int rose_add_loopback_node(rose_address *address) +int rose_add_loopback_node(const rose_address *address) { struct rose_node *rose_node; int err = 0; @@ -446,7 +446,7 @@ out: /* * Delete a loopback node. */ -void rose_del_loopback_node(rose_address *address) +void rose_del_loopback_node(const rose_address *address) { struct rose_node *rose_node; @@ -629,7 +629,8 @@ struct net_device *rose_dev_get(rose_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && + rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } @@ -646,7 +647,8 @@ static int rose_dev_exists(rose_address *addr) rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && + rosecmp(addr, (const rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index 4e565eeab426..be61d6f5be8d 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) { - return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); + return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); } static u32 rxrpc_bound_rto(u32 rto) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 7dd3a2dc5fa4..3258da3d5bed 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -480,16 +480,18 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, atomic_set(&p->tcfa_bindcnt, 1); if (cpustats) { - p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats) goto err1; - p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats_hw) goto err2; p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); if (!p->cpu_qstats) goto err3; } + gnet_stats_basic_sync_init(&p->tcfa_bstats); + gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); spin_lock_init(&p->tcfa_lock); p->tcfa_index = index; p->tcfa_tm.install = jiffies; @@ -499,7 +501,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, - &p->tcfa_lock, NULL, est); + &p->tcfa_lock, false, est); if (err) goto err4; } @@ -1126,13 +1128,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw) { if (a->cpu_bstats) { - _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); this_cpu_ptr(a->cpu_qstats)->drops += drops; if (hw) - _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), - bytes, packets); + _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); return; } @@ -1171,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, if (err < 0) goto errout; - if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 || - gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw, - &p->tcfa_bstats_hw) < 0 || + if (gnet_stats_copy_basic(&d, p->cpu_bstats, + &p->tcfa_bstats, false) < 0 || + gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, + &p->tcfa_bstats_hw, false) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfa_qstats, diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 5c36013339e1..f2bf896331a5 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, int action, filter_res; tcf_lastuse_update(&prog->tcf_tm); - bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb); filter = rcu_dereference(prog->filter); if (at_ingress) { diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 7064a365a1a9..b757f90a2d58 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlv_data; u16 metalen; - bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (skb_at_tc_ingress(skb)) @@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, exceed_mtu = true; } - bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index e4529b428cf4..8faa4c58305e 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, int ret, mac_len; tcf_lastuse_update(&m->tcf_tm); - bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb); /* Ensure 'data' points at mac_header prior calling mpls manipulating * functions. diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 832157a840fc..9e77ba8401e5 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, police->common.cpu_bstats, &police->tcf_rate_est, &police->tcf_lock, - NULL, est); + false, est); if (err) goto failure; } else if (tb[TCA_POLICE_AVRATE] && @@ -248,7 +248,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, int ret; tcf_lastuse_update(&police->tcf_tm); - bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb); ret = READ_ONCE(police->tcf_action); p = rcu_dereference_bh(police->params); diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 230501eb9e06..ce859b0e0deb 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, int retval; tcf_lastuse_update(&s->tcf_tm); - bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb); retval = READ_ONCE(s->tcf_action); psample_group = rcu_dereference_bh(s->psample_group); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index cbbe1861d3a2..e617ab4505ca 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a, * then it would look like "hello_3" (without quotes) */ pr_info("simple: %s_%llu\n", - (char *)d->tcfd_defdata, d->tcf_bstats.packets); + (char *)d->tcfd_defdata, + u64_stats_read(&d->tcf_bstats.packets)); spin_unlock(&d->tcf_lock); return d->tcf_action; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 605418538347..d30ecbfc8f84 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, int action; tcf_lastuse_update(&d->tcf_tm); - bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); params = rcu_dereference_bh(d->params); action = READ_ONCE(d->tcf_action); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index ecb9ee666095..9b6b52c5e24e 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, u64 flags; tcf_lastuse_update(&d->tcf_tm); - bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); action = READ_ONCE(d->tcf_action); if (unlikely(action == TC_ACT_SHOT)) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 12f39a2dffd4..efcd0b5e9a32 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -507,7 +507,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) continue; - if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) + if (tsize > 0 && + memcmp(stab->data, tab, flex_array_size(stab, data, tsize))) continue; stab->refcnt++; return stab; @@ -519,14 +520,14 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return ERR_PTR(-EINVAL); } - stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); + stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); stab->refcnt = 1; stab->szopts = *s; if (tsize > 0) - memcpy(stab->data, tab, tsize * sizeof(u16)); + memcpy(stab->data, tab, flex_array_size(stab, data, tsize)); list_add_tail(&stab->list, &qdisc_stab_list); @@ -884,7 +885,7 @@ static void qdisc_offload_graft_root(struct net_device *dev, static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event) { - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; + struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; struct gnet_stats_queue __percpu *cpu_qstats = NULL; struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -942,8 +943,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, cpu_qstats = q->cpu_qstats; } - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), - &d, cpu_bstats, &q->bstats) < 0 || + if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; @@ -1264,26 +1264,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { - seqcount_t *running; - err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); goto err_out4; } - if (sch->parent != TC_H_ROOT && - !(sch->flags & TCQ_F_INGRESS) && - (!p || !(p->flags & TCQ_F_MQROOT))) - running = qdisc_root_sleeping_running(sch); - else - running = &sch->running; - err = gen_new_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, NULL, - running, + true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); @@ -1359,7 +1350,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, sch->cpu_bstats, &sch->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); } out: diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 7d8518176b45..4c8e994cf0a5 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -52,7 +52,7 @@ struct atm_flow_data { struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ int ref; /* reference count */ - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct list_head list; struct atm_flow_data *excess; /* flow for excess traffic; @@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); INIT_LIST_HEAD(&p->flows); INIT_LIST_HEAD(&p->link.list); + gnet_stats_basic_sync_init(&p->link.bstats); list_add(&p->link.list, &p->flows); p->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, extack); @@ -652,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct atm_flow_data *flow = (struct atm_flow_data *)arg; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &flow->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index e0da15530f0e..02d9f0dfe356 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -116,7 +116,7 @@ struct cbq_class { long avgidle; long deficit; /* Saved deficit for WRR */ psched_time_t penalized; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tc_cbq_xstats xstats; @@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q) long avgidle = cl->avgidle; long idle; - cl->bstats.packets++; - cl->bstats.bytes += len; + _bstats_update(&cl->bstats, len, 1); /* * (now - last) is total time between packet right edges. @@ -1384,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; @@ -1519,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); @@ -1611,6 +1609,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; + gnet_stats_basic_sync_init(&cl->bstats); err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); @@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), - tca[TCA_RATE]); + NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); tcf_block_put(cl->block); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 642cd179b7a7..18e4f7a0b291 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -19,7 +19,7 @@ struct drr_class { struct Qdisc_class_common common; unsigned int filter_cnt; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct list_head alist; @@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), + NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); @@ -106,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -118,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), - tca[TCA_RATE]); + NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); qdisc_put(cl->qdisc); @@ -267,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (qlen) xstats.deficit = cl->deficit; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) return -1; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 1f857ffd1ac2..0eae9ff5edf6 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -41,7 +41,7 @@ struct ets_class { struct Qdisc *qdisc; u32 quantum; u32 deficit; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; }; @@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg, struct ets_class *cl = ets_class_from_arg(sch, arg); struct Qdisc *cl_q = cl->qdisc; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; @@ -661,7 +660,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->nbands = nbands; for (i = nstrict; i < q->nstrict; i++) { - INIT_LIST_HEAD(&q->classes[i].alist); if (q->classes[i].qdisc->q.qlen) { list_add_tail(&q->classes[i].alist, &q->active); q->classes[i].deficit = quanta[i]; @@ -687,7 +685,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, ets_offload_change(sch); for (i = q->nbands; i < oldbands; i++) { qdisc_put(q->classes[i].qdisc); - memset(&q->classes[i], 0, sizeof(q->classes[i])); + q->classes[i].qdisc = NULL; + q->classes[i].quantum = 0; + q->classes[i].deficit = 0; + gnet_stats_basic_sync_init(&q->classes[i].bstats); + memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); } return 0; } @@ -696,7 +698,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct ets_sched *q = qdisc_priv(sch); - int err; + int err, i; if (!opt) return -EINVAL; @@ -706,6 +708,9 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, return err; INIT_LIST_HEAD(&q->active); + for (i = 0; i < TCQ_ETS_MAX_BANDS; i++) + INIT_LIST_HEAD(&q->classes[i].alist); + return ets_qdisc_change(sch, opt, extack); } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index bb0cd6d3d2c2..839e1235db05 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -362,6 +362,8 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR] = { .type = NLA_U8 }, + [TCA_FQ_CODEL_CE_THRESHOLD_MASK] = { .type = NLA_U8 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, @@ -408,6 +410,11 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]) + q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]); + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]) + q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]); + if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); @@ -544,10 +551,15 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->flows_cnt)) goto nla_put_failure; - if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD && - nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, - codel_time_to_us(q->cparams.ce_threshold))) - goto nla_put_failure; + if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) { + if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, + codel_time_to_us(q->cparams.ce_threshold))) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector)) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask)) + goto nla_put_failure; + } return nla_nest_end(skb, opts); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a8dd06c74e31..b0ff0dff2773 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -304,8 +304,8 @@ trace: /* * Transmit possibly several skbs, and handle the return status as - * required. Owning running seqcount bit guarantees that - * only one CPU can execute this function. + * required. Owning qdisc running bit guarantees that only one CPU + * can execute this function. * * Returns to the caller: * false - hardware queue frozen backoff @@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, - .running = SEQCNT_ZERO(noop_qdisc.running), .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, @@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { EXPORT_SYMBOL(pfifo_fast_ops); static struct lock_class_key qdisc_tx_busylock; -static struct lock_class_key qdisc_running_key; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, @@ -892,11 +890,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); qdisc_skb_head_init(&sch->q); + gnet_stats_basic_sync_init(&sch->bstats); spin_lock_init(&sch->q.lock); if (ops->static_flags & TCQ_F_CPUSTATS) { sch->cpu_bstats = - netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!sch->cpu_bstats) goto errout1; @@ -916,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); - seqcount_init(&sch->running); - lockdep_set_class(&sch->running, - dev->qdisc_running_key ?: &qdisc_running_key); - sch->ops = ops; sch->flags = ops->static_flags; sch->enqueue = ops->enqueue; @@ -1330,6 +1325,39 @@ static int qdisc_change_tx_queue_len(struct net_device *dev, return 0; } +void dev_qdisc_change_real_num_tx(struct net_device *dev, + unsigned int new_real_tx) +{ + struct Qdisc *qdisc = dev->qdisc; + + if (qdisc->ops->change_real_num_tx) + qdisc->ops->change_real_num_tx(qdisc, new_real_tx); +} + +void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) +{ +#ifdef CONFIG_NET_SCHED + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int i; + + for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + /* Only update the default qdiscs we created, + * qdiscs with handles are always hashed. + */ + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_del(qdisc); + } + for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_add(qdisc, false); + } +#endif +} +EXPORT_SYMBOL(mq_change_real_num_tx); + int dev_qdisc_change_tx_queue_len(struct net_device *dev) { bool up = dev->flags & IFF_UP; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 621dc6afde8f..72de08ef8335 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -353,6 +353,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch) { struct gred_sched *table = qdisc_priv(sch); struct tc_gred_qopt_offload *hw_stats; + u64 bytes = 0, packets = 0; unsigned int i; int ret; @@ -364,9 +365,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch) hw_stats->handle = sch->handle; hw_stats->parent = sch->parent; - for (i = 0; i < MAX_DPs; i++) + for (i = 0; i < MAX_DPs; i++) { + gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); if (table->tab[i]) hw_stats->stats.xstats[i] = &table->tab[i]->stats; + } ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); /* Even if driver returns failure adjust the stats - in case offload @@ -375,19 +378,19 @@ static int gred_offload_dump_stats(struct Qdisc *sch) for (i = 0; i < MAX_DPs; i++) { if (!table->tab[i]) continue; - table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets; - table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; + table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); + table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; - _bstats_update(&sch->bstats, - hw_stats->stats.bstats[i].bytes, - hw_stats->stats.bstats[i].packets); + bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); + packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; sch->qstats.drops += hw_stats->stats.qstats[i].drops; sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; } + _bstats_update(&sch->bstats, bytes, packets); kfree(hw_stats); return ret; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b7ac30cca035..d3979a6000e7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -111,7 +111,7 @@ enum hfsc_class_flags { struct hfsc_class { struct Qdisc_class_common cl_common; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ @@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) return err; @@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, - qdisc_root_sleeping_running(sch), - tca[TCA_RATE]); + NULL, true, tca[TCA_RATE]); if (err) { tcf_block_put(cl->block); kfree(cl); @@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.work = cl->cl_total; xstats.rtwork = cl->cl_cumul; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; @@ -1406,6 +1404,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err) return err; + gnet_stats_basic_sync_init(&q->root.bstats); q->root.cl_common.classid = sch->handle; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5067a6e5d4fd..cf1d45db4e84 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -113,8 +113,8 @@ struct htb_class { /* * Written often fields */ - struct gnet_stats_basic_packed bstats; - struct gnet_stats_basic_packed bstats_bias; + struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_sync bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -1308,10 +1308,11 @@ nla_put_failure: static void htb_offload_aggregate_stats(struct htb_sched *q, struct htb_class *cl) { + u64 bytes = 0, packets = 0; struct htb_class *c; unsigned int i; - memset(&cl->bstats, 0, sizeof(cl->bstats)); + gnet_stats_basic_sync_init(&cl->bstats); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { @@ -1323,14 +1324,15 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, if (p != cl) continue; - cl->bstats.bytes += c->bstats_bias.bytes; - cl->bstats.packets += c->bstats_bias.packets; + bytes += u64_stats_read(&c->bstats_bias.bytes); + packets += u64_stats_read(&c->bstats_bias.packets); if (c->level == 0) { - cl->bstats.bytes += c->leaf.q->bstats.bytes; - cl->bstats.packets += c->leaf.q->bstats.packets; + bytes += u64_stats_read(&c->leaf.q->bstats.bytes); + packets += u64_stats_read(&c->leaf.q->bstats.packets); } } } + _bstats_update(&cl->bstats, bytes, packets); } static int @@ -1357,16 +1359,16 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (cl->leaf.q) cl->bstats = cl->leaf.q->bstats; else - memset(&cl->bstats, 0, sizeof(cl->bstats)); - cl->bstats.bytes += cl->bstats_bias.bytes; - cl->bstats.packets += cl->bstats_bias.packets; + gnet_stats_basic_sync_init(&cl->bstats); + _bstats_update(&cl->bstats, + u64_stats_read(&cl->bstats_bias.bytes), + u64_stats_read(&cl->bstats_bias.packets)); } else { htb_offload_aggregate_stats(q, cl); } } - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; @@ -1578,8 +1580,9 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, WARN_ON(old != q); if (cl->parent) { - cl->parent->bstats_bias.bytes += q->bstats.bytes; - cl->parent->bstats_bias.packets += q->bstats.packets; + _bstats_update(&cl->parent->bstats_bias, + u64_stats_read(&q->bstats.bytes), + u64_stats_read(&q->bstats.packets)); } offload_opt = (struct tc_htb_qopt_offload) { @@ -1849,6 +1852,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; + gnet_stats_basic_sync_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats_bias); + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); @@ -1858,7 +1864,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE] ? : &est.nla); if (err) goto err_block_put; @@ -1922,8 +1928,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, htb_graft_helper(dev_queue, old_q); goto err_kill_estimator; } - parent->bstats_bias.bytes += old_q->bstats.bytes; - parent->bstats_bias.packets += old_q->bstats.packets; + _bstats_update(&parent->bstats_bias, + u64_stats_read(&old_q->bstats.bytes), + u64_stats_read(&old_q->bstats.packets)); qdisc_put(old_q); } new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, @@ -1983,7 +1990,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) return err; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index e79f1afe0cfd..83d2e54bf303 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -130,10 +130,9 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) struct net_device *dev = qdisc_dev(sch); struct Qdisc *qdisc; unsigned int ntx; - __u32 qlen = 0; sch->q.qlen = 0; - memset(&sch->bstats, 0, sizeof(sch->bstats)); + gnet_stats_basic_sync_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs @@ -145,25 +144,11 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - if (qdisc_is_percpu_stats(qdisc)) { - qlen = qdisc_qlen_sum(qdisc); - __gnet_stats_copy_basic(NULL, &sch->bstats, - qdisc->cpu_bstats, - &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - qdisc->cpu_qstats, - &qdisc->qstats, qlen); - sch->q.qlen += qlen; - } else { - sch->q.qlen += qdisc->q.qlen; - sch->bstats.bytes += qdisc->bstats.bytes; - sch->bstats.packets += qdisc->bstats.packets; - sch->qstats.qlen += qdisc->qstats.qlen; - sch->qstats.backlog += qdisc->qstats.backlog; - sch->qstats.drops += qdisc->qstats.drops; - sch->qstats.requeues += qdisc->qstats.requeues; - sch->qstats.overlimits += qdisc->qstats.overlimits; - } + gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); + gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } @@ -246,8 +231,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, - &sch->bstats) < 0 || + if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; @@ -288,6 +272,7 @@ struct Qdisc_ops mq_qdisc_ops __read_mostly = { .init = mq_init, .destroy = mq_destroy, .attach = mq_attach, + .change_real_num_tx = mq_change_real_num_tx, .dump = mq_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 5eb3b1b7ae5e..b29f3453c6ea 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned int ntx, tc; sch->q.qlen = 0; - memset(&sch->bstats, 0, sizeof(sch->bstats)); + gnet_stats_basic_sync_init(&sch->bstats); memset(&sch->qstats, 0, sizeof(sch->qstats)); /* MQ supports lockless qdiscs. However, statistics accounting needs @@ -402,25 +402,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; spin_lock_bh(qdisc_lock(qdisc)); - if (qdisc_is_percpu_stats(qdisc)) { - __u32 qlen = qdisc_qlen_sum(qdisc); - - __gnet_stats_copy_basic(NULL, &sch->bstats, - qdisc->cpu_bstats, - &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - qdisc->cpu_qstats, - &qdisc->qstats, qlen); - sch->q.qlen += qlen; - } else { - sch->q.qlen += qdisc->q.qlen; - sch->bstats.bytes += qdisc->bstats.bytes; - sch->bstats.packets += qdisc->bstats.packets; - sch->qstats.backlog += qdisc->qstats.backlog; - sch->qstats.drops += qdisc->qstats.drops; - sch->qstats.requeues += qdisc->qstats.requeues; - sch->qstats.overlimits += qdisc->qstats.overlimits; - } + gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); + gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); } @@ -512,12 +498,13 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, { if (cl >= TC_H_MIN_PRIORITY) { int i; - __u32 qlen = 0; + __u32 qlen; struct gnet_stats_queue qstats = {0}; - struct gnet_stats_basic_packed bstats = {0}; + struct gnet_stats_basic_sync bstats; struct net_device *dev = qdisc_dev(sch); struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; + gnet_stats_basic_sync_init(&bstats); /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we * hold here is the look on dev_queue->qdisc_sleeping @@ -532,40 +519,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_lock_bh(qdisc_lock(qdisc)); - if (qdisc_is_percpu_stats(qdisc)) { - qlen = qdisc_qlen_sum(qdisc); - - __gnet_stats_copy_basic(NULL, &bstats, - qdisc->cpu_bstats, - &qdisc->bstats); - __gnet_stats_copy_queue(&qstats, - qdisc->cpu_qstats, - &qdisc->qstats, - qlen); - } else { - qlen += qdisc->q.qlen; - bstats.bytes += qdisc->bstats.bytes; - bstats.packets += qdisc->bstats.packets; - qstats.backlog += qdisc->qstats.backlog; - qstats.drops += qdisc->qstats.drops; - qstats.requeues += qdisc->qstats.requeues; - qstats.overlimits += qdisc->qstats.overlimits; - } + gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, + &qdisc->bstats, false); + gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, + &qdisc->qstats); + sch->q.qlen += qdisc_qlen(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); } + qlen = qdisc_qlen(sch) + qstats.qlen; /* Reclaim root sleeping lock before completing stats */ if (d->lock) spin_lock_bh(d->lock); - if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 || gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) return -1; } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, - sch->cpu_bstats, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(d, sch->cpu_bstats, + &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; } @@ -629,6 +604,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .init = mqprio_init, .destroy = mqprio_destroy, .attach = mqprio_attach, + .change_real_num_tx = mq_change_real_num_tx, .dump = mqprio_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index e282e7382117..cd8ab90c4765 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0c345e43a09a..ecbb10db1111 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -785,7 +785,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, if (!n || n > NETEM_DIST_MAX) return -EINVAL; - d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); + d = kvmalloc(struct_size(d, table, n), GFP_KERNEL); if (!d) return -ENOMEM; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 03fdf31ccb6a..3b8d7197c06b 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, + &cl_q->bstats, true) < 0 || qdisc_qstats_copy(d, cl_q) < 0) return -1; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 58a9d42b52b8..0b7f9ba28deb 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -131,7 +131,7 @@ struct qfq_class { unsigned int filter_cnt; - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct Qdisc *qdisc; @@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) return err; @@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; @@ -477,7 +478,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, - qdisc_root_sleeping_running(sch), + true, tca[TCA_RATE]); if (err) goto destroy_class; @@ -639,8 +640,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; @@ -1234,8 +1234,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - cl->bstats.bytes += len; - cl->bstats.packets += gso_segs; + _bstats_update(&cl->bstats, len, gso_segs); sch->qstats.backlog += len; ++sch->q.qlen; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b9fd18d98646..9ab068fa2672 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1977,7 +1977,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; return 0; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 78e79029dc63..72102277449e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -184,6 +184,20 @@ static int tbf_offload_dump(struct Qdisc *sch) return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); } +static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new, + struct Qdisc *old, struct netlink_ext_ack *extack) +{ + struct tc_tbf_qopt_offload graft_offload = { + .handle = sch->handle, + .parent = sch->parent, + .child_handle = new->handle, + .command = TC_TBF_GRAFT, + }; + + qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, + TC_SETUP_QDISC_TBF, &graft_offload, extack); +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -547,6 +561,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->qdisc); + + tbf_offload_graft(sch, new, *old, extack); return 0; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c038efc23ce3..5e50e007a7da 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -439,6 +439,47 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) return 0; } +static bool smc_isascii(char *hostname) +{ + int i; + + for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) + if (!isascii(hostname[i])) + return false; + return true; +} + +static void smc_conn_save_peer_info_fce(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)clc; + struct smc_clc_first_contact_ext *fce; + int clc_v2_len; + + if (clc->hdr.version == SMC_V1 || + !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) + return; + + if (smc->conn.lgr->is_smcd) { + memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid, + SMC_MAX_EID_LEN); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, + d1); + } else { + memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid, + SMC_MAX_EID_LEN); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, + r1); + } + fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len); + smc->conn.lgr->peer_os = fce->os_type; + smc->conn.lgr->peer_smc_release = fce->release; + if (smc_isascii(fce->hostname)) + memcpy(smc->conn.lgr->peer_hostname, fce->hostname, + SMC_MAX_HOSTNAME_LEN); +} + static void smcr_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { @@ -451,16 +492,6 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc, smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } -static bool smc_isascii(char *hostname) -{ - int i; - - for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) - if (!isascii(hostname[i])) - return false; - return true; -} - static void smcd_conn_save_peer_info(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { @@ -472,22 +503,6 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; - if (clc->hdr.version > SMC_V1 && - (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)clc; - struct smc_clc_first_contact_ext *fce = - (struct smc_clc_first_contact_ext *) - (((u8 *)clc_v2) + sizeof(*clc_v2)); - - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid, - SMC_MAX_EID_LEN); - smc->conn.lgr->peer_os = fce->os_type; - smc->conn.lgr->peer_smc_release = fce->release; - if (smc_isascii(fce->hostname)) - memcpy(smc->conn.lgr->peer_hostname, fce->hostname, - SMC_MAX_HOSTNAME_LEN); - } } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -497,14 +512,16 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, smcd_conn_save_peer_info(smc, clc); else smcr_conn_save_peer_info(smc, clc); + smc_conn_save_peer_info_fce(smc, clc); } static void smc_link_save_peer_info(struct smc_link *link, - struct smc_clc_msg_accept_confirm *clc) + struct smc_clc_msg_accept_confirm *clc, + struct smc_init_info *ini) { link->peer_qpn = ntoh24(clc->r0.qpn); - memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE); - memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); + memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE); + memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac)); link->peer_psn = ntoh24(clc->r0.psn); link->peer_mtu = clc->r0.qp_mtu; } @@ -608,7 +625,9 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) * used for the internal TCP socket */ smc_pnet_find_roce_resource(smc->clcsock->sk, ini); - if (!ini->ib_dev) + if (!ini->check_smcrv2 && !ini->ib_dev) + return SMC_CLC_DECL_NOSMCRDEV; + if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2) return SMC_CLC_DECL_NOSMCRDEV; return 0; } @@ -692,27 +711,42 @@ static int smc_find_proposal_devices(struct smc_sock *smc, int rc = 0; /* check if there is an ism device available */ - if (ini->smcd_version & SMC_V1) { - if (smc_find_ism_device(smc, ini) || - smc_connect_ism_vlan_setup(smc, ini)) { - if (ini->smc_type_v1 == SMC_TYPE_B) - ini->smc_type_v1 = SMC_TYPE_R; - else - ini->smc_type_v1 = SMC_TYPE_N; - } /* else ISM V1 is supported for this connection */ - if (smc_find_rdma_device(smc, ini)) { - if (ini->smc_type_v1 == SMC_TYPE_B) - ini->smc_type_v1 = SMC_TYPE_D; - else - ini->smc_type_v1 = SMC_TYPE_N; - } /* else RDMA is supported for this connection */ - } - if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini)) - ini->smc_type_v2 = SMC_TYPE_N; + if (!(ini->smcd_version & SMC_V1) || + smc_find_ism_device(smc, ini) || + smc_connect_ism_vlan_setup(smc, ini)) + ini->smcd_version &= ~SMC_V1; + /* else ISM V1 is supported for this connection */ + + /* check if there is an rdma device available */ + if (!(ini->smcr_version & SMC_V1) || + smc_find_rdma_device(smc, ini)) + ini->smcr_version &= ~SMC_V1; + /* else RDMA is supported for this connection */ + + ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1, + ini->smcr_version & SMC_V1); + + /* check if there is an ism v2 device available */ + if (!(ini->smcd_version & SMC_V2) || + !smc_ism_is_v2_capable() || + smc_find_ism_v2_device_clnt(smc, ini)) + ini->smcd_version &= ~SMC_V2; + + /* check if there is an rdma v2 device available */ + ini->check_smcrv2 = true; + ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr; + if (!(ini->smcr_version & SMC_V2) || + smc->clcsock->sk->sk_family != AF_INET || + !smc_clc_ueid_count() || + smc_find_rdma_device(smc, ini)) + ini->smcr_version &= ~SMC_V2; + ini->check_smcrv2 = false; + + ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2, + ini->smcr_version & SMC_V2); /* if neither ISM nor RDMA are supported, fallback */ - if (!smcr_indicated(ini->smc_type_v1) && - ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) + if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) rc = SMC_CLC_DECL_NOSMCDEV; return rc; @@ -752,6 +786,64 @@ static int smc_connect_clc(struct smc_sock *smc, SMC_CLC_ACCEPT, CLC_WAIT_TIME); } +void smc_fill_gid_list(struct smc_link_group *lgr, + struct smc_gidlist *gidlist, + struct smc_ib_device *known_dev, u8 *known_gid) +{ + struct smc_init_info *alt_ini = NULL; + + memset(gidlist, 0, sizeof(*gidlist)); + memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE); + + alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL); + if (!alt_ini) + goto out; + + alt_ini->vlan_id = lgr->vlan_id; + alt_ini->check_smcrv2 = true; + alt_ini->smcrv2.saddr = lgr->saddr; + smc_pnet_find_alt_roce(lgr, alt_ini, known_dev); + + if (!alt_ini->smcrv2.ib_dev_v2) + goto out; + + memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2, + SMC_GID_SIZE); + +out: + kfree(alt_ini); +} + +static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *aclc, + struct smc_init_info *ini) +{ + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)aclc; + struct smc_clc_first_contact_ext *fce = + (struct smc_clc_first_contact_ext *) + (((u8 *)clc_v2) + sizeof(*clc_v2)); + + if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) + return 0; + + if (fce->v2_direct) { + memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN); + ini->smcrv2.uses_gateway = false; + } else { + if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr, + smc_ib_gid_to_ipv4(aclc->r0.lcl.gid), + ini->smcrv2.nexthop_mac, + &ini->smcrv2.uses_gateway)) + return SMC_CLC_DECL_NOROUTE; + if (!ini->smcrv2.uses_gateway) { + /* mismatch: peer claims indirect, but its direct */ + return SMC_CLC_DECL_NOINDIRECT; + } + } + return 0; +} + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, @@ -759,11 +851,18 @@ static int smc_connect_rdma(struct smc_sock *smc, { int i, reason_code = 0; struct smc_link *link; + u8 *eid = NULL; ini->is_smcd = false; - ini->ib_lcl = &aclc->r0.lcl; ini->ib_clcqpn = ntoh24(aclc->r0.qpn); ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; + memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN); + memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE); + memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN); + + reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini); + if (reason_code) + return reason_code; mutex_lock(&smc_client_lgr_pending); reason_code = smc_conn_create(smc, ini); @@ -785,8 +884,9 @@ static int smc_connect_rdma(struct smc_sock *smc, if (l->peer_qpn == ntoh24(aclc->r0.qpn) && !memcmp(l->peer_gid, &aclc->r0.lcl.gid, SMC_GID_SIZE) && - !memcmp(l->peer_mac, &aclc->r0.lcl.mac, - sizeof(l->peer_mac))) { + (aclc->hdr.version > SMC_V1 || + !memcmp(l->peer_mac, &aclc->r0.lcl.mac, + sizeof(l->peer_mac)))) { link = l; break; } @@ -805,7 +905,7 @@ static int smc_connect_rdma(struct smc_sock *smc, } if (ini->first_contact_local) - smc_link_save_peer_info(link, aclc); + smc_link_save_peer_info(link, aclc, ini); if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) { reason_code = SMC_CLC_DECL_ERR_RTOK; @@ -828,8 +928,18 @@ static int smc_connect_rdma(struct smc_sock *smc, } smc_rmb_sync_sg_for_device(&smc->conn); + if (aclc->hdr.version > SMC_V1) { + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)aclc; + + eid = clc_v2->r1.eid; + if (ini->first_contact_local) + smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist, + link->smcibdev, link->gid); + } + reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, - SMC_V1); + aclc->hdr.version, eid, ini); if (reason_code) goto connect_abort; @@ -869,7 +979,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, int i; for (i = 0; i < ini->ism_offered_cnt + 1; i++) { - if (ini->ism_chid[i] == ntohs(aclc->chid)) { + if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) { ini->ism_selected = i; return 0; } @@ -883,6 +993,7 @@ static int smc_connect_ism(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { + u8 *eid = NULL; int rc = 0; ini->is_smcd = true; @@ -918,8 +1029,15 @@ static int smc_connect_ism(struct smc_sock *smc, smc_rx_init(smc); smc_tx_init(smc); + if (aclc->hdr.version > SMC_V1) { + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)aclc; + + eid = clc_v2->d1.eid; + } + rc = smc_clc_send_confirm(smc, ini->first_contact_local, - aclc->hdr.version); + aclc->hdr.version, eid, NULL); if (rc) goto connect_abort; mutex_unlock(&smc_server_lgr_pending); @@ -942,17 +1060,24 @@ connect_abort: static int smc_connect_check_aclc(struct smc_init_info *ini, struct smc_clc_msg_accept_confirm *aclc) { - if ((aclc->hdr.typev1 == SMC_TYPE_R && - !smcr_indicated(ini->smc_type_v1)) || - (aclc->hdr.typev1 == SMC_TYPE_D && - ((!smcd_indicated(ini->smc_type_v1) && - !smcd_indicated(ini->smc_type_v2)) || - (aclc->hdr.version == SMC_V1 && - !smcd_indicated(ini->smc_type_v1)) || - (aclc->hdr.version == SMC_V2 && - !smcd_indicated(ini->smc_type_v2))))) + if (aclc->hdr.typev1 != SMC_TYPE_R && + aclc->hdr.typev1 != SMC_TYPE_D) return SMC_CLC_DECL_MODEUNSUPP; + if (aclc->hdr.version >= SMC_V2) { + if ((aclc->hdr.typev1 == SMC_TYPE_R && + !smcr_indicated(ini->smc_type_v2)) || + (aclc->hdr.typev1 == SMC_TYPE_D && + !smcd_indicated(ini->smc_type_v2))) + return SMC_CLC_DECL_MODEUNSUPP; + } else { + if ((aclc->hdr.typev1 == SMC_TYPE_R && + !smcr_indicated(ini->smc_type_v1)) || + (aclc->hdr.typev1 == SMC_TYPE_D && + !smcd_indicated(ini->smc_type_v1))) + return SMC_CLC_DECL_MODEUNSUPP; + } + return 0; } @@ -983,14 +1108,15 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, version); - ini->smcd_version = SMC_V1; - ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0; + ini->smcd_version = SMC_V1 | SMC_V2; + ini->smcr_version = SMC_V1 | SMC_V2; ini->smc_type_v1 = SMC_TYPE_B; - ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N; + ini->smc_type_v2 = SMC_TYPE_B; /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { ini->smcd_version &= ~SMC_V1; + ini->smcr_version = 0; ini->smc_type_v1 = SMC_TYPE_N; if (!ini->smcd_version) { rc = SMC_CLC_DECL_GETVLANERR; @@ -1018,15 +1144,17 @@ static int __smc_connect(struct smc_sock *smc) /* check if smc modes and versions of CLC proposal and accept match */ rc = smc_connect_check_aclc(ini, aclc); version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; - ini->smcd_version = version; if (rc) goto vlan_cleanup; /* depending on previous steps, connect using rdma or ism */ - if (aclc->hdr.typev1 == SMC_TYPE_R) + if (aclc->hdr.typev1 == SMC_TYPE_R) { + ini->smcr_version = version; rc = smc_connect_rdma(smc, aclc, ini); - else if (aclc->hdr.typev1 == SMC_TYPE_D) + } else if (aclc->hdr.typev1 == SMC_TYPE_D) { + ini->smcd_version = version; rc = smc_connect_ism(smc, aclc, ini); + } if (rc) goto vlan_cleanup; @@ -1307,7 +1435,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); /* initial contact - try to establish second link */ - smc_llc_srv_add_link(link); + smc_llc_srv_add_link(link, NULL); return 0; } @@ -1387,33 +1515,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, ini->smc_type_v1 = pclc->hdr.typev1; ini->smc_type_v2 = pclc->hdr.typev2; - ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; - if (pclc->hdr.version > SMC_V1) - ini->smcd_version |= - ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; - if (!(ini->smcd_version & SMC_V2)) { + ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0; + ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0; + if (pclc->hdr.version > SMC_V1) { + if (smcd_indicated(ini->smc_type_v2)) + ini->smcd_version |= SMC_V2; + if (smcr_indicated(ini->smc_type_v2)) + ini->smcr_version |= SMC_V2; + } + if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) { rc = SMC_CLC_DECL_PEERNOSMC; goto out; } - if (!smc_ism_is_v2_capable()) { - ini->smcd_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOISM2SUPP; - goto out; - } pclc_v2_ext = smc_get_clc_v2_ext(pclc); if (!pclc_v2_ext) { ini->smcd_version &= ~SMC_V2; + ini->smcr_version &= ~SMC_V2; rc = SMC_CLC_DECL_NOV2EXT; goto out; } pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); - if (!pclc_smcd_v2_ext) { - ini->smcd_version &= ~SMC_V2; - rc = SMC_CLC_DECL_NOV2DEXT; + if (ini->smcd_version & SMC_V2) { + if (!smc_ism_is_v2_capable()) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOISM2SUPP; + } else if (!pclc_smcd_v2_ext) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOV2DEXT; + } else if (!pclc_v2_ext->hdr.eid_cnt && + !pclc_v2_ext->hdr.flag.seid) { + ini->smcd_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOUEID; + } + } + if (ini->smcr_version & SMC_V2) { + if (!pclc_v2_ext->hdr.eid_cnt) { + ini->smcr_version &= ~SMC_V2; + rc = SMC_CLC_DECL_NOUEID; + } } out: - if (!ini->smcd_version) + if (!ini->smcd_version && !ini->smcr_version) return rc; return 0; @@ -1533,11 +1676,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, pclc_smcd = smc_get_clc_msg_smcd(pclc); smc_v2_ext = smc_get_clc_v2_ext(pclc); smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); - if (!smcd_v2_ext || - !smc_v2_ext->hdr.flag.seid) { /* no system EID support for SMCD */ - smc_find_ism_store_rc(SMC_CLC_DECL_NOSEID, ini); - goto not_found; - } mutex_lock(&smcd_dev_list.mutex); if (pclc_smcd->ism.chid) @@ -1555,14 +1693,16 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, } mutex_unlock(&smcd_dev_list.mutex); - if (ini->ism_dev[0]) { - smc_ism_get_system_eid(ini->ism_dev[0], &eid); - if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN)) - goto not_found; - } else { + if (!ini->ism_dev[0]) { + smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); goto not_found; } + smc_ism_get_system_eid(&eid); + if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, + smcd_v2_ext->system_eid, eid)) + goto not_found; + /* separate - outside the smcd_dev_list.lock */ smcd_version = ini->smcd_version; for (i = 0; i < matches; i++) { @@ -1579,6 +1719,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, } /* no V2 ISM device could be initialized */ ini->smcd_version = smcd_version; /* restore original value */ + ini->negotiated_eid[0] = 0; not_found: ini->smcd_version &= ~SMC_V2; @@ -1608,6 +1749,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, not_found: smc_find_ism_store_rc(rc, ini); + ini->smcd_version &= ~SMC_V1; ini->ism_dev[0] = NULL; ini->is_smcd = false; } @@ -1626,24 +1768,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) return 0; } +static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_v2_extension *smc_v2_ext; + u8 smcr_version; + int rc; + + if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2)) + goto not_found; + + smc_v2_ext = smc_get_clc_v2_ext(pclc); + if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL)) + goto not_found; + + /* prepare RDMA check */ + memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN); + memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE); + memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN); + ini->check_smcrv2 = true; + ini->smcrv2.clc_sk = new_smc->clcsock->sk; + ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr; + ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce); + rc = smc_find_rdma_device(new_smc, ini); + if (rc) { + smc_find_ism_store_rc(rc, ini); + goto not_found; + } + if (!ini->smcrv2.uses_gateway) + memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN); + + smcr_version = ini->smcr_version; + ini->smcr_version = SMC_V2; + rc = smc_listen_rdma_init(new_smc, ini); + if (!rc) + rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local); + if (!rc) + return; + ini->smcr_version = smcr_version; + smc_find_ism_store_rc(rc, ini); + +not_found: + ini->smcr_version &= ~SMC_V2; + ini->check_smcrv2 = false; +} + static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { int rc; - if (!smcr_indicated(ini->smc_type_v1)) + if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1)) return SMC_CLC_DECL_NOSMCDEV; /* prepare RDMA check */ - ini->ib_lcl = &pclc->lcl; + memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN); + memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE); + memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN); rc = smc_find_rdma_device(new_smc, ini); if (rc) { /* no RDMA device found */ - if (ini->smc_type_v1 == SMC_TYPE_B) - /* neither ISM nor RDMA device found */ - rc = SMC_CLC_DECL_NOSMCDEV; - return rc; + return SMC_CLC_DECL_NOSMCDEV; } rc = smc_listen_rdma_init(new_smc, ini); if (rc) @@ -1656,51 +1843,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { - int rc; + int prfx_rc; /* check for ISM device matching V2 proposed device */ smc_find_ism_v2_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0; - if (!(ini->smcd_version & SMC_V1)) - return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV; - - /* check for matching IP prefix and subnet length */ - rc = smc_listen_prfx_check(new_smc, pclc); - if (rc) - return ini->rc ?: rc; + /* check for matching IP prefix and subnet length (V1) */ + prfx_rc = smc_listen_prfx_check(new_smc, pclc); + if (prfx_rc) + smc_find_ism_store_rc(prfx_rc, ini); /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) return ini->rc ?: SMC_CLC_DECL_GETVLANERR; /* check for ISM device matching V1 proposed device */ - smc_find_ism_v1_device_serv(new_smc, pclc, ini); + if (!prfx_rc) + smc_find_ism_v1_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0; - if (pclc->hdr.typev1 == SMC_TYPE_D) + if (!smcr_indicated(pclc->hdr.typev1) && + !smcr_indicated(pclc->hdr.typev2)) /* skip RDMA and decline */ return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV; - /* check if RDMA is available */ - rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); - smc_find_ism_store_rc(rc, ini); + /* check if RDMA V2 is available */ + smc_find_rdma_v2_device_serv(new_smc, pclc, ini); + if (ini->smcrv2.ib_dev_v2) + return 0; - return (!rc) ? 0 : ini->rc; + /* check if RDMA V1 is available */ + if (!prfx_rc) { + int rc; + + rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); + smc_find_ism_store_rc(rc, ini); + return (!rc) ? 0 : ini->rc; + } + return SMC_CLC_DECL_NOSMCDEV; } /* listen worker: finish RDMA setup */ static int smc_listen_rdma_finish(struct smc_sock *new_smc, struct smc_clc_msg_accept_confirm *cclc, - bool local_first) + bool local_first, + struct smc_init_info *ini) { struct smc_link *link = new_smc->conn.lnk; int reason_code = 0; if (local_first) - smc_link_save_peer_info(link, cclc); + smc_link_save_peer_info(link, cclc, ini); if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) return SMC_CLC_DECL_ERR_RTOK; @@ -1721,12 +1917,13 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); - u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; struct socket *newclcsock = new_smc->clcsock; struct smc_clc_msg_accept_confirm *cclc; struct smc_clc_msg_proposal_area *buf; struct smc_clc_msg_proposal *pclc; struct smc_init_info *ini = NULL; + u8 proposal_version = SMC_V1; + u8 accept_version; int rc = 0; if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) @@ -1757,7 +1954,9 @@ static void smc_listen_work(struct work_struct *work) SMC_CLC_PROPOSAL, CLC_WAIT_TIME); if (rc) goto out_decl; - version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version; + + if (pclc->hdr.version > SMC_V1) + proposal_version = SMC_V2; /* IPSec connections opt out of SMC optimizations */ if (using_ipsec(new_smc)) { @@ -1787,8 +1986,9 @@ static void smc_listen_work(struct work_struct *work) goto out_unlock; /* send SMC Accept CLC message */ + accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version; rc = smc_clc_send_accept(new_smc, ini->first_contact_local, - ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1); + accept_version, ini->negotiated_eid); if (rc) goto out_unlock; @@ -1810,7 +2010,7 @@ static void smc_listen_work(struct work_struct *work) /* finish worker */ if (!ini->is_smcd) { rc = smc_listen_rdma_finish(new_smc, cclc, - ini->first_contact_local); + ini->first_contact_local, ini); if (rc) goto out_unlock; mutex_unlock(&smc_server_lgr_pending); @@ -1824,7 +2024,7 @@ out_unlock: mutex_unlock(&smc_server_lgr_pending); out_decl: smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0, - version); + proposal_version); out_free: kfree(ini); kfree(buf); @@ -2662,6 +2862,7 @@ static void __exit smc_exit(void) proto_unregister(&smc_proto); smc_pnet_exit(); smc_nl_exit(); + smc_clc_exit(); unregister_pernet_subsys(&smc_net_stat_ops); unregister_pernet_subsys(&smc_net_ops); rcu_barrier(); diff --git a/net/smc/smc.h b/net/smc/smc.h index d65e15f0c944..f4286ca1f228 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -29,9 +29,6 @@ * devices */ -#define SMC_MAX_HOSTNAME_LEN 32 -#define SMC_MAX_EID_LEN 32 - extern struct proto smc_proto; extern struct proto smc_proto6; @@ -59,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */ struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ - u8 type; + union { + u8 type; +#if defined(__BIG_ENDIAN_BITFIELD) + struct { + u8 llc_version:4, + llc_type:4; + }; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + struct { + u8 llc_type:4, + llc_version:4; + }; +#endif + }; } __aligned(1); struct smc_cdc_conn_state_flags { @@ -289,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc) } #endif +struct smc_gidlist; + struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); +void smc_fill_gid_list(struct smc_link_group *lgr, + struct smc_gidlist *gidlist, + struct smc_ib_device *known_dev, u8 *known_gid); #endif /* __SMC_H */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 6ec1ebe878ae..8409ab71a5e4 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -26,10 +26,12 @@ #include "smc_clc.h" #include "smc_ib.h" #include "smc_ism.h" +#include "smc_netlink.h" #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 +#define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108 #define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ @@ -39,6 +41,297 @@ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; +struct smc_clc_eid_table { + rwlock_t lock; + struct list_head list; + u8 ueid_cnt; + u8 seid_enabled; +}; + +static struct smc_clc_eid_table smc_clc_eid_table; + +struct smc_clc_eid_entry { + struct list_head list; + u8 eid[SMC_MAX_EID_LEN]; +}; + +/* The size of a user EID is 32 characters. + * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'. + * Blanks should only be used to pad to the expected size. + * First character must be alphanumeric. + */ +static bool smc_clc_ueid_valid(char *ueid) +{ + char *end = ueid + SMC_MAX_EID_LEN; + + while (--end >= ueid && isspace(*end)) + ; + if (end < ueid) + return false; + if (!isalnum(*ueid) || islower(*ueid)) + return false; + while (ueid <= end) { + if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' && + *ueid != '-') + return false; + ueid++; + } + return true; +} + +static int smc_clc_ueid_add(char *ueid) +{ + struct smc_clc_eid_entry *new_ueid, *tmp_ueid; + int rc; + + if (!smc_clc_ueid_valid(ueid)) + return -EINVAL; + + /* add a new ueid entry to the ueid table if there isn't one */ + new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL); + if (!new_ueid) + return -ENOMEM; + memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN); + + write_lock(&smc_clc_eid_table.lock); + if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) { + rc = -ERANGE; + goto err_out; + } + list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { + if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) { + rc = -EEXIST; + goto err_out; + } + } + list_add_tail(&new_ueid->list, &smc_clc_eid_table.list); + smc_clc_eid_table.ueid_cnt++; + write_unlock(&smc_clc_eid_table.lock); + return 0; + +err_out: + write_unlock(&smc_clc_eid_table.lock); + kfree(new_ueid); + return rc; +} + +int smc_clc_ueid_count(void) +{ + int count; + + read_lock(&smc_clc_eid_table.lock); + count = smc_clc_eid_table.ueid_cnt; + read_unlock(&smc_clc_eid_table.lock); + + return count; +} + +int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; + char *ueid; + + if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) + return -EINVAL; + ueid = (char *)nla_data(nla_ueid); + + return smc_clc_ueid_add(ueid); +} + +/* remove one or all ueid entries from the table */ +static int smc_clc_ueid_remove(char *ueid) +{ + struct smc_clc_eid_entry *lst_ueid, *tmp_ueid; + int rc = -ENOENT; + + /* remove table entry */ + write_lock(&smc_clc_eid_table.lock); + list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list, + list) { + if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) { + list_del(&lst_ueid->list); + smc_clc_eid_table.ueid_cnt--; + kfree(lst_ueid); + rc = 0; + } + } + if (!rc && !smc_clc_eid_table.ueid_cnt) { + smc_clc_eid_table.seid_enabled = 1; + rc = -EAGAIN; /* indicate success and enabling of seid */ + } + write_unlock(&smc_clc_eid_table.lock); + return rc; +} + +int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; + char *ueid; + + if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) + return -EINVAL; + ueid = (char *)nla_data(nla_ueid); + + return smc_clc_ueid_remove(ueid); +} + +int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info) +{ + smc_clc_ueid_remove(NULL); + return 0; +} + +static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, + u32 flags, char *ueid) +{ + char ueid_str[SMC_MAX_EID_LEN + 1]; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family, + flags, SMC_NETLINK_DUMP_UEID); + if (!hdr) + return -ENOMEM; + snprintf(ueid_str, sizeof(ueid_str), "%s", ueid); + if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; + } + genlmsg_end(skb, hdr); + return 0; +} + +static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq, + int start_idx) +{ + struct smc_clc_eid_entry *lst_ueid; + int idx = 0; + + read_lock(&smc_clc_eid_table.lock); + list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) { + if (idx++ < start_idx) + continue; + if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI, + lst_ueid->eid)) { + --idx; + break; + } + } + read_unlock(&smc_clc_eid_table.lock); + return idx; +} + +int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + int idx; + + idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, cb_ctx->pos[0]); + + cb_ctx->pos[0] = idx; + return skb->len; +} + +int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); + char seid_str[SMC_MAX_EID_LEN + 1]; + u8 seid_enabled; + void *hdr; + u8 *seid; + + if (cb_ctx->pos[0]) + return skb->len; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &smc_gen_nl_family, NLM_F_MULTI, + SMC_NETLINK_DUMP_SEID); + if (!hdr) + return -ENOMEM; + if (!smc_ism_is_v2_capable()) + goto end; + + smc_ism_get_system_eid(&seid); + snprintf(seid_str, sizeof(seid_str), "%s", seid); + if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) + goto err; + read_lock(&smc_clc_eid_table.lock); + seid_enabled = smc_clc_eid_table.seid_enabled; + read_unlock(&smc_clc_eid_table.lock); + if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled)) + goto err; +end: + genlmsg_end(skb, hdr); + cb_ctx->pos[0]++; + return skb->len; +err: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) +{ + write_lock(&smc_clc_eid_table.lock); + smc_clc_eid_table.seid_enabled = 1; + write_unlock(&smc_clc_eid_table.lock); + return 0; +} + +int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) +{ + int rc = 0; + + write_lock(&smc_clc_eid_table.lock); + if (!smc_clc_eid_table.ueid_cnt) + rc = -ENOENT; + else + smc_clc_eid_table.seid_enabled = 0; + write_unlock(&smc_clc_eid_table.lock); + return rc; +} + +static bool _smc_clc_match_ueid(u8 *peer_ueid) +{ + struct smc_clc_eid_entry *tmp_ueid; + + list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { + if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN)) + return true; + } + return false; +} + +bool smc_clc_match_eid(u8 *negotiated_eid, + struct smc_clc_v2_extension *smc_v2_ext, + u8 *peer_eid, u8 *local_eid) +{ + bool match = false; + int i; + + negotiated_eid[0] = 0; + read_lock(&smc_clc_eid_table.lock); + if (peer_eid && local_eid && + smc_clc_eid_table.seid_enabled && + smc_v2_ext->hdr.flag.seid && + !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) { + memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN); + match = true; + goto out; + } + + for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) { + if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) { + memcpy(negotiated_eid, smc_v2_ext->user_eids[i], + SMC_MAX_EID_LEN); + match = true; + goto out; + } + } +out: + read_unlock(&smc_clc_eid_table.lock); + return match; +} + /* check arriving CLC proposal */ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) { @@ -100,6 +393,27 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + sizeof(struct smc_clc_first_contact_ext))) return false; + if (hdr->typev1 == SMC_TYPE_R && + ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) + return false; + } + return true; +} + +/* check arriving CLC decline */ +static bool +smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) +{ + struct smc_clc_msg_hdr *hdr = &dclc->hdr; + + if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) + return false; + if (hdr->version == SMC_V1) { + if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline)) + return false; + } else { + if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2)) + return false; } return true; } @@ -145,9 +459,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; - if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + if (!smc_clc_msg_decl_valid(dclc)) return false; - trl = &dclc->trl; + check_trl = false; break; default: return false; @@ -446,15 +760,16 @@ out: /* send CLC DECLINE message across internal TCP socket */ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) { - struct smc_clc_msg_decline dclc; + struct smc_clc_msg_decline *dclc_v1; + struct smc_clc_msg_decline_v2 dclc; struct msghdr msg; + int len, send_len; struct kvec vec; - int len; + dclc_v1 = (struct smc_clc_msg_decline *)&dclc; memset(&dclc, 0, sizeof(dclc)); memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); dclc.hdr.type = SMC_CLC_DECLINE; - dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = version; dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? @@ -464,14 +779,22 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); - memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + if (version == SMC_V1) { + memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + send_len = sizeof(*dclc_v1); + } else { + memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + send_len = sizeof(dclc); + } + dclc.hdr.length = htons(send_len); memset(&msg, 0, sizeof(msg)); vec.iov_base = &dclc; - vec.iov_len = sizeof(struct smc_clc_msg_decline); - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, - sizeof(struct smc_clc_msg_decline)); - if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) + vec.iov_len = send_len; + len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len); + if (len < 0 || len < send_len) len = -EPROTO; return len > 0 ? 0 : len; } @@ -551,9 +874,10 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) if (ini->smc_type_v2 == SMC_TYPE_N) { pclc_smcd->v2_ext_offset = 0; } else { + struct smc_clc_eid_entry *ueident; u16 v2_ext_offset; - u8 *eid = NULL; + v2_ext->hdr.flag.release = SMC_RELEASE; v2_ext_offset = sizeof(*pclc_smcd) - offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); if (ini->smc_type_v1 != SMC_TYPE_N) @@ -561,21 +885,31 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0]); pclc_smcd->v2_ext_offset = htons(v2_ext_offset); - v2_ext->hdr.eid_cnt = 0; + plen += sizeof(*v2_ext); + + read_lock(&smc_clc_eid_table.lock); + v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; + plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; + i = 0; + list_for_each_entry(ueident, &smc_clc_eid_table.list, list) { + memcpy(v2_ext->user_eids[i++], ueident->eid, + sizeof(ueident->eid)); + } + read_unlock(&smc_clc_eid_table.lock); + } + if (smcd_indicated(ini->smc_type_v2)) { + u8 *eid = NULL; + + v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; - v2_ext->hdr.flag.release = SMC_RELEASE; - v2_ext->hdr.flag.seid = 1; v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); - if (ini->ism_dev[0]) - smc_ism_get_system_eid(ini->ism_dev[0], &eid); - else - smc_ism_get_system_eid(ini->ism_dev[1], &eid); - if (eid) + smc_ism_get_system_eid(&eid); + if (eid && v2_ext->hdr.flag.seid) memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); - plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); + plen += sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { for (i = 1; i <= ini->ism_offered_cnt; i++) { gidchids[i - 1].gid = @@ -587,6 +921,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) sizeof(struct smc_clc_smcd_gid_chid); } } + if (smcr_indicated(ini->smc_type_v2)) + memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); + pclc_base->hdr.length = htons(plen); memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -608,13 +945,16 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) } if (ini->smc_type_v2 != SMC_TYPE_N) { vec[i].iov_base = v2_ext; - vec[i++].iov_len = sizeof(*v2_ext); - vec[i].iov_base = smcd_v2_ext; - vec[i++].iov_len = sizeof(*smcd_v2_ext); - if (ini->ism_offered_cnt) { - vec[i].iov_base = gidchids; - vec[i++].iov_len = ini->ism_offered_cnt * + vec[i++].iov_len = sizeof(*v2_ext) + + (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); + if (smcd_indicated(ini->smc_type_v2)) { + vec[i].iov_base = smcd_v2_ext; + vec[i++].iov_len = sizeof(*smcd_v2_ext); + if (ini->ism_offered_cnt) { + vec[i].iov_base = gidchids; + vec[i++].iov_len = ini->ism_offered_cnt * sizeof(struct smc_clc_smcd_gid_chid); + } } } vec[i].iov_base = trl; @@ -636,13 +976,15 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) /* build and send CLC CONFIRM / ACCEPT message */ static int smc_clc_send_confirm_accept(struct smc_sock *smc, struct smc_clc_msg_accept_confirm_v2 *clc_v2, - int first_contact, u8 version) + int first_contact, u8 version, + u8 *eid, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; struct smc_clc_msg_accept_confirm *clc; struct smc_clc_first_contact_ext fce; + struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; - struct kvec vec[3]; + struct kvec vec[5]; struct msghdr msg; int i, len; @@ -664,12 +1006,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { - u8 *eid = NULL; - - clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); - smc_ism_get_system_eid(conn->lgr->smcd, &eid); - if (eid) - memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); + clc_v2->d1.chid = + htons(smc_ism_get_chid(conn->lgr->smcd)); + if (eid && eid[0]) + memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; if (first_contact) smc_clc_fill_fce(&fce, &len); @@ -708,6 +1048,26 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(clc->r0.psn, link->psn_initial); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + } else { + if (eid && eid[0]) + memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN); + len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + smc_clc_fill_fce(&fce, &len); + fce.v2_direct = !link->lgr->uses_gateway; + memset(&gle, 0, sizeof(gle)); + if (ini && clc->hdr.type == SMC_CLC_CONFIRM) { + gle.gid_cnt = ini->smcrv2.gidlist.len; + len += sizeof(gle); + len += gle.gid_cnt * sizeof(gle.gid[0]); + } else { + len += sizeof(gle.reserved); + } + } + clc_v2->hdr.length = htons(len); + } memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); } @@ -715,7 +1075,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, i = 0; vec[i].iov_base = clc_v2; if (version > SMC_V1) - vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); + vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? + SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 : + SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) - + sizeof(trl); else vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN : @@ -724,6 +1087,18 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, if (version > SMC_V1 && first_contact) { vec[i].iov_base = &fce; vec[i++].iov_len = sizeof(fce); + if (!conn->lgr->is_smcd) { + if (clc->hdr.type == SMC_CLC_CONFIRM) { + vec[i].iov_base = &gle; + vec[i++].iov_len = sizeof(gle); + vec[i].iov_base = &ini->smcrv2.gidlist.list; + vec[i++].iov_len = gle.gid_cnt * + sizeof(gle.gid[0]); + } else { + vec[i].iov_base = &gle.reserved; + vec[i++].iov_len = sizeof(gle.reserved); + } + } } vec[i].iov_base = &trl; vec[i++].iov_len = sizeof(trl); @@ -733,7 +1108,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, /* send CLC CONFIRM message across internal TCP socket */ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, - u8 version) + u8 version, u8 *eid, struct smc_init_info *ini) { struct smc_clc_msg_accept_confirm_v2 cclc_v2; int reason_code = 0; @@ -743,7 +1118,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, memset(&cclc_v2, 0, sizeof(cclc_v2)); cclc_v2.hdr.type = SMC_CLC_CONFIRM; len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, - version); + version, eid, ini); if (len < ntohs(cclc_v2.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; @@ -758,7 +1133,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, /* send CLC ACCEPT message across internal TCP socket */ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, - u8 version) + u8 version, u8 *negotiated_eid) { struct smc_clc_msg_accept_confirm_v2 aclc_v2; int len; @@ -766,7 +1141,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, memset(&aclc_v2, 0, sizeof(aclc_v2)); aclc_v2.hdr.type = SMC_CLC_ACCEPT; len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, - version); + version, negotiated_eid, NULL); if (len < ntohs(aclc_v2.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; @@ -786,4 +1161,14 @@ void __init smc_clc_init(void) u = utsname(); memcpy(smc_hostname, u->nodename, min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); + + INIT_LIST_HEAD(&smc_clc_eid_table.list); + rwlock_init(&smc_clc_eid_table.lock); + smc_clc_eid_table.ueid_cnt = 0; + smc_clc_eid_table.seid_enabled = 1; +} + +void smc_clc_exit(void) +{ + smc_clc_ueid_remove(NULL); } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 32d37f7b70f2..83f02f131fc0 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -14,8 +14,10 @@ #define _SMC_CLC_H #include <rdma/ib_verbs.h> +#include <linux/smc.h> #include "smc.h" +#include "smc_netlink.h" #define SMC_CLC_PROPOSAL 0x01 #define SMC_CLC_ACCEPT 0x02 @@ -42,6 +44,7 @@ #define SMC_CLC_DECL_NOV2DEXT 0x03030005 /* peer sent no clc SMC-Dv2 ext. */ #define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */ #define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */ +#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ @@ -52,6 +55,8 @@ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ #define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ #define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */ +#define SMC_CLC_DECL_NOROUTE 0x030e0000 /* SMC-Rv2 conn. no route to peer */ +#define SMC_CLC_DECL_NOINDIRECT 0x030f0000 /* SMC-Rv2 conn. indirect mismatch*/ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ @@ -158,6 +163,7 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ } __aligned(4); #define SMC_CLC_MAX_V6_PREFIX 8 +#define SMC_CLC_MAX_UEID 8 struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal pclc_base; @@ -165,6 +171,7 @@ struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_v2_extension pclc_v2_ext; + u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN]; struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; struct smc_clc_msg_trail pclc_trl; @@ -209,11 +216,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ #define SMC_CLC_OS_AIX 3 struct smc_clc_first_contact_ext { - u8 reserved1; #if defined(__BIG_ENDIAN_BITFIELD) + u8 v2_direct : 1, + reserved : 7; u8 os_type : 4, release : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 7, + v2_direct : 1; u8 release : 4, os_type : 4; #endif @@ -221,6 +231,13 @@ struct smc_clc_first_contact_ext { u8 hostname[SMC_MAX_HOSTNAME_LEN]; }; +struct smc_clc_fce_gid_ext { + u8 reserved[16]; + u8 gid_cnt; + u8 reserved2[3]; + u8 gid[][SMC_GID_SIZE]; +}; + struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { @@ -235,13 +252,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { - struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ + struct { /* SMC-R */ + struct smcr_clc_msg_accept_confirm r0; + u8 eid[SMC_MAX_EID_LEN]; + u8 reserved6[8]; + } r1; struct { /* SMC-D */ struct smcd_clc_msg_accept_confirm_common d0; __be16 chid; u8 eid[SMC_MAX_EID_LEN]; u8 reserved5[8]; - }; + } d1; }; }; @@ -260,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ } __aligned(4); +#define SMC_DECL_DIAG_COUNT_V2 4 /* no. of additional peer diagnosis codes */ + +struct smc_clc_msg_decline_v2 { /* clc decline message */ + struct smc_clc_msg_hdr hdr; + u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */ + __be32 peer_diagnosis; /* diagnosis information */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 os_type : 4, + reserved : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 4, + os_type : 4; +#endif + u8 reserved2[3]; + __be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2]; + struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ +} __aligned(4); + /* determine start of the prefix area within the proposal message */ static inline struct smc_clc_msg_proposal_prefix * smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) @@ -278,6 +317,17 @@ static inline bool smcd_indicated(int smc_type) return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B; } +static inline u8 smc_indicated_type(int is_smcd, int is_smcr) +{ + if (is_smcd && is_smcr) + return SMC_TYPE_B; + if (is_smcd) + return SMC_TYPE_D; + if (is_smcr) + return SMC_TYPE_R; + return SMC_TYPE_N; +} + /* get SMC-D info from proposal message */ static inline struct smc_clc_msg_smcd * smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) @@ -330,10 +380,22 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version); int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, - u8 version); + u8 version, u8 *eid, struct smc_init_info *ini); int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, - u8 version); + u8 version, u8 *negotiated_eid); void smc_clc_init(void) __init; +void smc_clc_exit(void); void smc_clc_get_hostname(u8 **host); +bool smc_clc_match_eid(u8 *negotiated_eid, + struct smc_clc_v2_extension *smc_v2_ext, + u8 *peer_eid, u8 *local_eid); +int smc_clc_ueid_count(void); +int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb); +int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info); +int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info); +int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info); +int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb); +int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info); +int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d2206743dc71..8e642f8f334f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -223,7 +223,6 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); char hostname[SMC_MAX_HOSTNAME_LEN + 1]; char smc_seid[SMC_MAX_EID_LEN + 1]; - struct smcd_dev *smcd_dev; struct nlattr *attrs; u8 *seid = NULL; u8 *host = NULL; @@ -245,6 +244,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) goto errattr; if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable())) goto errattr; + if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true)) + goto errattr; smc_clc_get_hostname(&host); if (host) { memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); @@ -252,13 +253,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname)) goto errattr; } - mutex_lock(&smcd_dev_list.mutex); - smcd_dev = list_first_entry_or_null(&smcd_dev_list.list, - struct smcd_dev, list); - if (smcd_dev) - smc_ism_get_system_eid(smcd_dev, &seid); - mutex_unlock(&smcd_dev_list.mutex); - if (seid && smc_ism_is_v2_capable()) { + if (smc_ism_is_v2_capable()) { + smc_ism_get_system_eid(&seid); memcpy(smc_seid, seid, SMC_MAX_EID_LEN); smc_seid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid)) @@ -277,12 +273,65 @@ errmsg: return skb->len; } +/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */ +static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb, + struct nlattr *v2_attrs) +{ + char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; + char smc_eid[SMC_MAX_EID_LEN + 1]; + + if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) + goto errv2attr; + memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); + smc_host[SMC_MAX_HOSTNAME_LEN] = 0; + if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) + goto errv2attr; + memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); + smc_eid[SMC_MAX_EID_LEN] = 0; + if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) + goto errv2attr; + + nla_nest_end(skb, v2_attrs); + return 0; + +errv2attr: + nla_nest_cancel(skb, v2_attrs); + return -EMSGSIZE; +} + +static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *v2_attrs; + + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2); + if (!v2_attrs) + goto errattr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway)) + goto errv2attr; + + nla_nest_end(skb, v2_attrs); + return 0; + +errv2attr: + nla_nest_cancel(skb, v2_attrs); +errattr: + return -EMSGSIZE; +} + static int smc_nl_fill_lgr(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { char smc_target[SMC_MAX_PNETID_LEN + 1]; - struct nlattr *attrs; + struct nlattr *attrs, *v2_attrs; attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR); if (!attrs) @@ -302,6 +351,15 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) goto errattr; + if (lgr->smc_version > SMC_V1) { + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON); + if (!v2_attrs) + goto errattr; + if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) + goto errattr; + if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb)) + goto errattr; + } nla_nest_end(skb, attrs); return 0; @@ -434,10 +492,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; char smc_pnet[SMC_MAX_PNETID_LEN + 1]; - char smc_eid[SMC_MAX_EID_LEN + 1]; - struct nlattr *v2_attrs; struct nlattr *attrs; void *nlh; @@ -469,32 +524,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) goto errattr; + if (lgr->smc_version > SMC_V1) { + struct nlattr *v2_attrs; - v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_V2); - if (!v2_attrs) - goto errattr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version)) - goto errv2attr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release)) - goto errv2attr; - if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) - goto errv2attr; - memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); - smc_host[SMC_MAX_HOSTNAME_LEN] = 0; - if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) - goto errv2attr; - memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); - smc_eid[SMC_MAX_EID_LEN] = 0; - if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) - goto errv2attr; - - nla_nest_end(skb, v2_attrs); + v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON); + if (!v2_attrs) + goto errattr; + if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) + goto errattr; + } nla_nest_end(skb, attrs); genlmsg_end(skb, nlh); return 0; -errv2attr: - nla_nest_cancel(skb, v2_attrs); errattr: nla_nest_cancel(skb, attrs); errout: @@ -690,24 +732,30 @@ static void smcr_copy_dev_info_to_link(struct smc_link *link) int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini) { + struct smc_ib_device *smcibdev; u8 rndvec[3]; int rc; - get_device(&ini->ib_dev->ibdev->dev); - atomic_inc(&ini->ib_dev->lnk_cnt); + if (lgr->smc_version == SMC_V2) { + lnk->smcibdev = ini->smcrv2.ib_dev_v2; + lnk->ibport = ini->smcrv2.ib_port_v2; + } else { + lnk->smcibdev = ini->ib_dev; + lnk->ibport = ini->ib_port; + } + get_device(&lnk->smcibdev->ibdev->dev); + atomic_inc(&lnk->smcibdev->lnk_cnt); + lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; lnk->link_idx = link_idx; - lnk->smcibdev = ini->ib_dev; - lnk->ibport = ini->ib_port; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); - lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; atomic_set(&lnk->conn_cnt, 0); smc_llc_link_set_uid(lnk); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); - if (!ini->ib_dev->initialized) { - rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); + if (!lnk->smcibdev->initialized) { + rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev); if (rc) goto out; } @@ -715,7 +763,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, - ini->vlan_id, lnk->gid, &lnk->sgid_index); + ini->vlan_id, lnk->gid, &lnk->sgid_index, + lgr->smc_version == SMC_V2 ? + &ini->smcrv2 : NULL); if (rc) goto out; rc = smc_llc_link_init(lnk); @@ -746,11 +796,12 @@ clear_llc_lnk: smc_llc_link_clear(lnk, false); out: smc_ibdev_cnt_dec(lnk); - put_device(&ini->ib_dev->ibdev->dev); + put_device(&lnk->smcibdev->ibdev->dev); + smcibdev = lnk->smcibdev; memset(lnk, 0, sizeof(struct smc_link)); lnk->state = SMC_LNK_UNUSED; - if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) - wake_up(&ini->ib_dev->lnks_deleted); + if (!atomic_dec_return(&smcibdev->lnk_cnt)) + wake_up(&smcibdev->lnks_deleted); return rc; } @@ -814,18 +865,37 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); } else { /* SMC-R specific settings */ + struct smc_ib_device *ibdev; + int ibport; + lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; - memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, + lgr->smc_version = ini->smcr_version; + memcpy(lgr->peer_systemid, ini->peer_systemid, SMC_SYSTEMID_LEN); - memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], + if (lgr->smc_version == SMC_V2) { + ibdev = ini->smcrv2.ib_dev_v2; + ibport = ini->smcrv2.ib_port_v2; + lgr->saddr = ini->smcrv2.saddr; + lgr->uses_gateway = ini->smcrv2.uses_gateway; + memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac, + ETH_ALEN); + } else { + ibdev = ini->ib_dev; + ibport = ini->ib_port; + } + memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], SMC_MAX_PNETID_LEN); + if (smc_wr_alloc_lgr_mem(lgr)) + goto free_wq; smc_llc_lgr_init(lgr, smc); link_idx = SMC_SINGLE_LINK; lnk = &lgr->lnk[link_idx]; rc = smcr_link_init(lgr, lnk, link_idx, ini); - if (rc) + if (rc) { + smc_wr_free_lgr_mem(lgr); goto free_wq; + } lgr_list = &smc_lgr_list.list; lgr_lock = &smc_lgr_list.lock; atomic_inc(&lgr_cnt); @@ -1232,6 +1302,7 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) wake_up(&lgr->smcd->lgrs_deleted); } else { + smc_wr_free_lgr_mem(lgr); if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } @@ -1642,13 +1713,15 @@ out: return rc; } -static bool smcr_lgr_match(struct smc_link_group *lgr, - struct smc_clc_msg_local *lcl, +static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, + u8 peer_systemid[], + u8 peer_gid[], + u8 peer_mac_v1[], enum smc_lgr_role role, u32 clcqpn) { int i; - if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || + if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) || lgr->role != role) return false; @@ -1656,8 +1729,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, if (!smc_link_active(&lgr->lnk[i])) continue; if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && - !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && - !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) + !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) && + (smcr_version == SMC_V2 || + !memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN))) return true; } return false; @@ -1696,7 +1770,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], ini->ism_peer_gid[ini->ism_selected]) : - smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && + smcr_lgr_match(lgr, ini->smcr_version, + ini->peer_systemid, + ini->peer_gid, ini->peer_mac, role, + ini->ib_clcqpn)) && !lgr->sync_err && (ini->smcd_version == SMC_V2 || lgr->vlan_id == ini->vlan_id) && diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c043ecdca5c4..59cef3b830d8 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -42,11 +42,16 @@ enum smc_link_state { /* possible states of a link */ }; #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ +#define SMC_WR_BUF_V2_SIZE 8192 /* size of v2 work request buffer */ struct smc_wr_buf { u8 raw[SMC_WR_BUF_SIZE]; }; +struct smc_wr_v2_buf { + u8 raw[SMC_WR_BUF_V2_SIZE]; +}; + #define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ enum smc_wr_reg_state { @@ -92,7 +97,11 @@ struct smc_link { struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ struct completion *wr_tx_compl; /* WR send CQE completion */ /* above four vectors have wr_tx_cnt elements and use the same index */ + struct ib_send_wr *wr_tx_v2_ib; /* WR send v2 meta data */ + struct ib_sge *wr_tx_v2_sge; /* WR send v2 gather meta data*/ + struct smc_wr_tx_pend *wr_tx_v2_pend; /* WR send v2 waiting for CQE */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ + dma_addr_t wr_tx_v2_dma_addr; /* DMA address of v2 tx buf*/ atomic_long_t wr_tx_id; /* seq # of last sent WR */ unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ @@ -104,6 +113,7 @@ struct smc_link { struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */ /* above three vectors have wr_rx_cnt elements and use the same index */ dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ + dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/ u64 wr_rx_id; /* seq # of last recv WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ @@ -208,6 +218,7 @@ enum smc_llc_flowtype { SMC_LLC_FLOW_NONE = 0, SMC_LLC_FLOW_ADD_LINK = 2, SMC_LLC_FLOW_DEL_LINK = 4, + SMC_LLC_FLOW_REQ_ADD_LINK = 5, SMC_LLC_FLOW_RKEY = 6, }; @@ -250,6 +261,10 @@ struct smc_link_group { /* client or server */ struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ + struct smc_wr_v2_buf *wr_rx_buf_v2; + /* WR v2 recv payload buffer */ + struct smc_wr_v2_buf *wr_tx_buf_v2; + /* WR v2 send payload buffer */ char peer_systemid[SMC_SYSTEMID_LEN]; /* unique system_id of peer */ struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] @@ -288,6 +303,9 @@ struct smc_link_group { /* link keep alive time */ u32 llc_termination_rsn; /* rsn code for termination */ + u8 nexthop_mac[ETH_ALEN]; + u8 uses_gateway; + __be32 saddr; }; struct { /* SMC-D */ u64 peer_gid; @@ -302,6 +320,31 @@ struct smc_link_group { struct smc_clc_msg_local; +#define GID_LIST_SIZE 2 + +struct smc_gidlist { + u8 len; + u8 list[GID_LIST_SIZE][SMC_GID_SIZE]; +}; + +struct smc_init_info_smcrv2 { + /* Input fields */ + __be32 saddr; + struct sock *clc_sk; + __be32 daddr; + + /* Output fields when saddr is set */ + struct smc_ib_device *ib_dev_v2; + u8 ib_port_v2; + u8 ib_gid_v2[SMC_GID_SIZE]; + + /* Additional output fields when clc_sk and daddr is set as well */ + u8 uses_gateway; + u8 nexthop_mac[ETH_ALEN]; + + struct smc_gidlist gidlist; +}; + struct smc_init_info { u8 is_smcd; u8 smc_type_v1; @@ -310,12 +353,18 @@ struct smc_init_info { u8 first_contact_local; unsigned short vlan_id; u32 rc; + u8 negotiated_eid[SMC_MAX_EID_LEN]; /* SMC-R */ - struct smc_clc_msg_local *ib_lcl; + u8 smcr_version; + u8 check_smcrv2; + u8 peer_gid[SMC_GID_SIZE]; + u8 peer_mac[ETH_ALEN]; + u8 peer_systemid[SMC_SYSTEMID_LEN]; struct smc_ib_device *ib_dev; u8 ib_gid[SMC_GID_SIZE]; u8 ib_port; u32 ib_clcqpn; + struct smc_init_info_smcrv2 smcrv2; /* SMC-D */ u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index a8845343d183..d93055ec17ae 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -17,6 +17,7 @@ #include <linux/scatterlist.h> #include <linux/wait.h> #include <linux/mutex.h> +#include <linux/inetdevice.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -62,16 +63,23 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; struct ib_qp_attr qp_attr; + u8 hop_lim = 1; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_RTR; qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); - rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); + if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) + hop_lim = IPV6_DEFAULT_HOPLIMIT; + rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, hop_lim, 0); rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); - memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, - sizeof(lnk->peer_mac)); + if (lnk->lgr->smc_version == SMC_V2 && lnk->lgr->uses_gateway) + memcpy(&qp_attr.ah_attr.roce.dmac, lnk->lgr->nexthop_mac, + sizeof(lnk->lgr->nexthop_mac)); + else + memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, + sizeof(lnk->peer_mac)); qp_attr.dest_qp_num = lnk->peer_qpn; qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming @@ -183,9 +191,81 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } +int smc_ib_find_route(__be32 saddr, __be32 daddr, + u8 nexthop_mac[], u8 *uses_gateway) +{ + struct neighbour *neigh = NULL; + struct rtable *rt = NULL; + struct flowi4 fl4 = { + .saddr = saddr, + .daddr = daddr + }; + + if (daddr == cpu_to_be32(INADDR_NONE)) + goto out; + rt = ip_route_output_flow(&init_net, &fl4, NULL); + if (IS_ERR(rt)) + goto out; + if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) + goto out; + neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr); + if (neigh) { + memcpy(nexthop_mac, neigh->ha, ETH_ALEN); + *uses_gateway = rt->rt_uses_gateway; + return 0; + } +out: + return -ENOENT; +} + +static int smc_ib_determine_gid_rcu(const struct net_device *ndev, + const struct ib_gid_attr *attr, + u8 gid[], u8 *sgid_index, + struct smc_init_info_smcrv2 *smcrv2) +{ + if (!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) { + if (gid) + memcpy(gid, &attr->gid, SMC_GID_SIZE); + if (sgid_index) + *sgid_index = attr->index; + return 0; + } + if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && + smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { + struct in_device *in_dev = __in_dev_get_rcu(ndev); + const struct in_ifaddr *ifa; + bool subnet_match = false; + + if (!in_dev) + goto out; + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (!inet_ifa_match(smcrv2->saddr, ifa)) + continue; + subnet_match = true; + break; + } + if (!subnet_match) + goto out; + if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, + smcrv2->daddr, + smcrv2->nexthop_mac, + &smcrv2->uses_gateway)) + goto out; + + if (gid) + memcpy(gid, &attr->gid, SMC_GID_SIZE); + if (sgid_index) + *sgid_index = attr->index; + return 0; + } +out: + return -ENODEV; +} + /* determine the gid for an ib-device port and vlan id */ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, - unsigned short vlan_id, u8 gid[], u8 *sgid_index) + unsigned short vlan_id, u8 gid[], u8 *sgid_index, + struct smc_init_info_smcrv2 *smcrv2) { const struct ib_gid_attr *attr; const struct net_device *ndev; @@ -201,15 +281,13 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, if (!IS_ERR(ndev) && ((!vlan_id && !is_vlan_dev(ndev)) || (vlan_id && is_vlan_dev(ndev) && - vlan_dev_vlan_id(ndev) == vlan_id)) && - attr->gid_type == IB_GID_TYPE_ROCE) { - rcu_read_unlock(); - if (gid) - memcpy(gid, &attr->gid, SMC_GID_SIZE); - if (sgid_index) - *sgid_index = attr->index; - rdma_put_gid_attr(attr); - return 0; + vlan_dev_vlan_id(ndev) == vlan_id))) { + if (!smc_ib_determine_gid_rcu(ndev, attr, gid, + sgid_index, smcrv2)) { + rcu_read_unlock(); + rdma_put_gid_attr(attr); + return 0; + } } rcu_read_unlock(); rdma_put_gid_attr(attr); @@ -217,6 +295,58 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, return -ENODEV; } +/* check if gid is still defined on smcibdev */ +static bool smc_ib_check_link_gid(u8 gid[SMC_GID_SIZE], bool smcrv2, + struct smc_ib_device *smcibdev, u8 ibport) +{ + const struct ib_gid_attr *attr; + bool rc = false; + int i; + + for (i = 0; !rc && i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { + attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); + if (IS_ERR(attr)) + continue; + + rcu_read_lock(); + if ((!smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE) || + (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && + !(ipv6_addr_type((const struct in6_addr *)&attr->gid) + & IPV6_ADDR_LINKLOCAL))) + if (!memcmp(gid, &attr->gid, SMC_GID_SIZE)) + rc = true; + rcu_read_unlock(); + rdma_put_gid_attr(attr); + } + return rc; +} + +/* check all links if the gid is still defined on smcibdev */ +static void smc_ib_gid_check(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link_group *lgr; + int i; + + spin_lock_bh(&smc_lgr_list.lock); + list_for_each_entry(lgr, &smc_lgr_list.list, list) { + if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, + SMC_MAX_PNETID_LEN)) + continue; /* lgr is not affected */ + if (list_empty(&lgr->list)) + continue; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state == SMC_LNK_UNUSED || + lgr->lnk[i].smcibdev != smcibdev) + continue; + if (!smc_ib_check_link_gid(lgr->lnk[i].gid, + lgr->smc_version == SMC_V2, + smcibdev, ibport)) + smcr_port_err(smcibdev, ibport); + } + } + spin_unlock_bh(&smc_lgr_list.lock); +} + static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) { int rc; @@ -255,6 +385,7 @@ static void smc_ib_port_event_work(struct work_struct *work) } else { clear_bit(port_idx, smcibdev->ports_going_away); smcr_port_add(smcibdev, port_idx + 1); + smc_ib_gid_check(smcibdev, port_idx + 1); } } } @@ -523,6 +654,7 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk) /* create a queue pair within the protection domain for a link */ int smc_ib_create_queue_pair(struct smc_link *lnk) { + int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; struct ib_qp_init_attr qp_attr = { .event_handler = smc_ib_qp_event_handler, .qp_context = lnk, @@ -536,7 +668,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, - .max_recv_sge = 1, + .max_recv_sge = sges_per_buf, }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 3085f5180da7..07585937370e 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -59,6 +59,17 @@ struct smc_ib_device { /* ib-device infos for smc */ int ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */ }; +static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE]) +{ + struct in6_addr *addr6 = (struct in6_addr *)gid; + + if (ipv6_addr_v4mapped(addr6) || + !(addr6->s6_addr32[0] | addr6->s6_addr32[1] | addr6->s6_addr32[2])) + return addr6->s6_addr32[3]; + return cpu_to_be32(INADDR_NONE); +} + +struct smc_init_info_smcrv2; struct smc_buf_desc; struct smc_link; @@ -90,7 +101,10 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, - unsigned short vlan_id, u8 gid[], u8 *sgid_index); + unsigned short vlan_id, u8 gid[], u8 *sgid_index, + struct smc_init_info_smcrv2 *smcrv2); +int smc_ib_find_route(__be32 saddr, __be32 daddr, + u8 nexthop_mac[], u8 *uses_gateway); bool smc_ib_is_valid_local_systemid(void); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); #endif diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 9cb2df289963..fd28cc498b98 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -23,6 +23,7 @@ struct smcd_dev_list smcd_dev_list = { }; static bool smc_ism_v2_capable; +static u8 smc_ism_v2_system_eid[SMC_MAX_EID_LEN]; /* Test if an ISM communication is possible - same CPC */ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) @@ -42,9 +43,12 @@ int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, return rc < 0 ? rc : 0; } -void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) +void smc_ism_get_system_eid(u8 **eid) { - smcd->ops->get_system_eid(smcd, eid); + if (!smc_ism_v2_capable) + *eid = NULL; + else + *eid = smc_ism_v2_system_eid; } u16 smc_ism_get_chid(struct smcd_dev *smcd) @@ -435,9 +439,12 @@ int smcd_register_dev(struct smcd_dev *smcd) if (list_empty(&smcd_dev_list.list)) { u8 *system_eid = NULL; - smc_ism_get_system_eid(smcd, &system_eid); - if (system_eid[24] != '0' || system_eid[28] != '0') + smcd->ops->get_system_eid(smcd, &system_eid); + if (system_eid[24] != '0' || system_eid[28] != '0') { smc_ism_v2_capable = true; + memcpy(smc_ism_v2_system_eid, system_eid, + SMC_MAX_EID_LEN); + } } /* sort list: devices without pnetid before devices with pnetid */ if (smcd->pnetid[0]) @@ -533,4 +540,5 @@ EXPORT_SYMBOL_GPL(smcd_handle_irq); void __init smc_ism_init(void) { smc_ism_v2_capable = false; + memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN); } diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 113efc7352ed..004b22a13ffa 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -48,7 +48,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, void *data, size_t len); int smc_ism_signal_shutdown(struct smc_link_group *lgr); -void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid); +void smc_ism_get_system_eid(u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); bool smc_ism_is_v2_capable(void); void smc_ism_init(void); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 72f4b72eb175..a9623c952007 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -23,16 +23,24 @@ struct smc_llc_hdr { struct smc_wr_rx_hdr common; - u8 length; /* 44 */ -#if defined(__BIG_ENDIAN_BITFIELD) - u8 reserved:4, - add_link_rej_rsn:4; + union { + struct { + u8 length; /* 44 */ + #if defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + add_link_rej_rsn:4; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 add_link_rej_rsn:4, - reserved:4; + u8 add_link_rej_rsn:4, + reserved:4; #endif + }; + u16 length_v2; /* 44 - 8192*/ + }; u8 flags; -}; +} __packed; /* format defined in + * IBM Shared Memory Communications Version 2 + * (https://www.ibm.com/support/pages/node/6326337) + */ #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 @@ -76,6 +84,32 @@ struct smc_llc_msg_add_link_cont_rt { __be64 rmb_vaddr_new; }; +struct smc_llc_msg_add_link_v2_ext { +#if defined(__BIG_ENDIAN_BITFIELD) + u8 v2_direct : 1, + reserved : 7; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved : 7, + v2_direct : 1; +#endif + u8 reserved2; + u8 client_target_gid[SMC_GID_SIZE]; + u8 reserved3[8]; + u16 num_rkeys; + struct smc_llc_msg_add_link_cont_rt rt[]; +} __packed; /* format defined in + * IBM Shared Memory Communications Version 2 + * (https://www.ibm.com/support/pages/node/6326337) + */ + +struct smc_llc_msg_req_add_link_v2 { + struct smc_llc_hdr hd; + u8 reserved[20]; + u8 gid_cnt; + u8 reserved2[3]; + u8 gid[][SMC_GID_SIZE]; +}; + #define SMC_LLC_RKEYS_PER_CONT_MSG 2 struct smc_llc_msg_add_link_cont { /* type 0x03 */ @@ -114,7 +148,8 @@ struct smc_rmb_rtoken { __be64 rmb_vaddr; } __packed; /* format defined in RFC7609 */ -#define SMC_LLC_RKEYS_PER_MSG 3 +#define SMC_LLC_RKEYS_PER_MSG 3 +#define SMC_LLC_RKEYS_PER_MSG_V2 255 struct smc_llc_msg_confirm_rkey { /* type 0x06 */ struct smc_llc_hdr hd; @@ -135,9 +170,18 @@ struct smc_llc_msg_delete_rkey { /* type 0x09 */ u8 reserved2[4]; }; +struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + u8 num_inval_rkeys; + u8 reserved[2]; + __be32 rkey[]; +}; + union smc_llc_msg { struct smc_llc_msg_confirm_link confirm_link; struct smc_llc_msg_add_link add_link; + struct smc_llc_msg_req_add_link_v2 req_add_link; struct smc_llc_msg_add_link_cont add_link_cont; struct smc_llc_msg_del_link delete_link; @@ -189,7 +233,7 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, struct smc_llc_qentry *qentry) { - u8 msg_type = qentry->msg.raw.hdr.common.type; + u8 msg_type = qentry->msg.raw.hdr.common.llc_type; if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && flow_type != msg_type && !lgr->delayed_event) { @@ -219,7 +263,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, spin_unlock_bh(&lgr->llc_flow_lock); return false; } - switch (qentry->msg.raw.hdr.common.type) { + switch (qentry->msg.raw.hdr.common.llc_type) { case SMC_LLC_ADD_LINK: flow->type = SMC_LLC_FLOW_ADD_LINK; break; @@ -306,7 +350,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, smc_llc_flow_qentry_del(flow); goto out; } - rcv_msg = flow->qentry->msg.raw.hdr.common.type; + rcv_msg = flow->qentry->msg.raw.hdr.common.llc_type; if (exp_msg && rcv_msg != exp_msg) { if (exp_msg == SMC_LLC_ADD_LINK && rcv_msg == SMC_LLC_DELETE_LINK) { @@ -374,6 +418,30 @@ static int smc_llc_add_pending_send(struct smc_link *link, return 0; } +static int smc_llc_add_pending_send_v2(struct smc_link *link, + struct smc_wr_v2_buf **wr_buf, + struct smc_wr_tx_pend_priv **pend) +{ + int rc; + + rc = smc_wr_tx_get_v2_slot(link, smc_llc_tx_handler, wr_buf, pend); + if (rc < 0) + return rc; + return 0; +} + +static void smc_llc_init_msg_hdr(struct smc_llc_hdr *hdr, + struct smc_link_group *lgr, size_t len) +{ + if (lgr->smc_version == SMC_V2) { + hdr->common.llc_version = SMC_V2; + hdr->length_v2 = len; + } else { + hdr->common.llc_version = 0; + hdr->length = len; + } +} + /* high-level API to send LLC confirm link */ int smc_llc_send_confirm_link(struct smc_link *link, enum smc_llc_reqresp reqresp) @@ -390,8 +458,8 @@ int smc_llc_send_confirm_link(struct smc_link *link, goto put_out; confllc = (struct smc_llc_msg_confirm_link *)wr_buf; memset(confllc, 0, sizeof(*confllc)); - confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; - confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); + confllc->hd.common.llc_type = SMC_LLC_CONFIRM_LINK; + smc_llc_init_msg_hdr(&confllc->hd, link->lgr, sizeof(*confllc)); confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; @@ -426,8 +494,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, goto put_out; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); - rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; - rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey); + rkeyllc->hd.common.llc_type = SMC_LLC_CONFIRM_RKEY; + smc_llc_init_msg_hdr(&rkeyllc->hd, send_link->lgr, sizeof(*rkeyllc)); rtok_ix = 1; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { @@ -471,8 +539,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, goto put_out; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); - rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; - rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey); + rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY; + smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc)); rkeyllc->num_rkeys = 1; rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); /* send llc message */ @@ -482,26 +550,116 @@ put_out: return rc; } +/* return first buffer from any of the next buf lists */ +static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, + int *buf_lst) +{ + struct smc_buf_desc *buf_pos; + + while (*buf_lst < SMC_RMBE_SIZES) { + buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], + struct smc_buf_desc, list); + if (buf_pos) + return buf_pos; + (*buf_lst)++; + } + return NULL; +} + +/* return next rmb from buffer lists */ +static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, + int *buf_lst, + struct smc_buf_desc *buf_pos) +{ + struct smc_buf_desc *buf_next; + + if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { + (*buf_lst)++; + return _smc_llc_get_next_rmb(lgr, buf_lst); + } + buf_next = list_next_entry(buf_pos, list); + return buf_next; +} + +static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, + int *buf_lst) +{ + *buf_lst = 0; + return smc_llc_get_next_rmb(lgr, buf_lst, NULL); +} + +static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, + struct smc_link *link, struct smc_link *link_new) +{ + struct smc_link_group *lgr = link->lgr; + struct smc_buf_desc *buf_pos; + int prim_lnk_idx, lnk_idx, i; + struct smc_buf_desc *rmb; + int len = sizeof(*ext); + int buf_lst; + + ext->v2_direct = !lgr->uses_gateway; + memcpy(ext->client_target_gid, link_new->gid, SMC_GID_SIZE); + + prim_lnk_idx = link->link_idx; + lnk_idx = link_new->link_idx; + mutex_lock(&lgr->rmbs_lock); + ext->num_rkeys = lgr->conns_num; + if (!ext->num_rkeys) + goto out; + buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); + for (i = 0; i < ext->num_rkeys; i++) { + if (!buf_pos) + break; + rmb = buf_pos; + ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey); + ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey); + ext->rt[i].rmb_vaddr_new = + cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); + buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); + while (buf_pos && !(buf_pos)->used) + buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); + } + len += i * sizeof(ext->rt[0]); +out: + mutex_unlock(&lgr->rmbs_lock); + return len; +} + /* send ADD LINK request or response */ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], struct smc_link *link_new, enum smc_llc_reqresp reqresp) { + struct smc_llc_msg_add_link_v2_ext *ext = NULL; struct smc_llc_msg_add_link *addllc; struct smc_wr_tx_pend_priv *pend; - struct smc_wr_buf *wr_buf; + int len = sizeof(*addllc); int rc; if (!smc_wr_tx_link_hold(link)) return -ENOLINK; - rc = smc_llc_add_pending_send(link, &wr_buf, &pend); - if (rc) - goto put_out; - addllc = (struct smc_llc_msg_add_link *)wr_buf; + if (link->lgr->smc_version == SMC_V2) { + struct smc_wr_v2_buf *wr_buf; + + rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); + if (rc) + goto put_out; + addllc = (struct smc_llc_msg_add_link *)wr_buf; + ext = (struct smc_llc_msg_add_link_v2_ext *) + &wr_buf->raw[sizeof(*addllc)]; + memset(ext, 0, SMC_WR_TX_SIZE); + } else { + struct smc_wr_buf *wr_buf; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + goto put_out; + addllc = (struct smc_llc_msg_add_link *)wr_buf; + } memset(addllc, 0, sizeof(*addllc)); - addllc->hd.common.type = SMC_LLC_ADD_LINK; - addllc->hd.length = sizeof(struct smc_llc_msg_add_link); + addllc->hd.common.llc_type = SMC_LLC_ADD_LINK; if (reqresp == SMC_LLC_RESP) addllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(addllc->sender_mac, mac, ETH_ALEN); @@ -516,8 +674,14 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], addllc->qp_mtu = min(link_new->path_mtu, link_new->peer_mtu); } + if (ext && link_new) + len += smc_llc_fill_ext_v2(ext, link, link_new); + smc_llc_init_msg_hdr(&addllc->hd, link->lgr, len); /* send llc message */ - rc = smc_wr_tx_send(link, pend); + if (link->lgr->smc_version == SMC_V2) + rc = smc_wr_tx_v2_send(link, pend, len); + else + rc = smc_wr_tx_send(link, pend); put_out: smc_wr_tx_link_put(link); return rc; @@ -541,8 +705,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, delllc = (struct smc_llc_msg_del_link *)wr_buf; memset(delllc, 0, sizeof(*delllc)); - delllc->hd.common.type = SMC_LLC_DELETE_LINK; - delllc->hd.length = sizeof(struct smc_llc_msg_del_link); + delllc->hd.common.llc_type = SMC_LLC_DELETE_LINK; + smc_llc_init_msg_hdr(&delllc->hd, link->lgr, sizeof(*delllc)); if (reqresp == SMC_LLC_RESP) delllc->hd.flags |= SMC_LLC_FLAG_RESP; if (orderly) @@ -574,8 +738,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) goto put_out; testllc = (struct smc_llc_msg_test_link *)wr_buf; memset(testllc, 0, sizeof(*testllc)); - testllc->hd.common.type = SMC_LLC_TEST_LINK; - testllc->hd.length = sizeof(struct smc_llc_msg_test_link); + testllc->hd.common.llc_type = SMC_LLC_TEST_LINK; + smc_llc_init_msg_hdr(&testllc->hd, link->lgr, sizeof(*testllc)); memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); /* send llc message */ rc = smc_wr_tx_send(link, pend); @@ -651,44 +815,6 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, return -EMLINK; } -/* return first buffer from any of the next buf lists */ -static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, - int *buf_lst) -{ - struct smc_buf_desc *buf_pos; - - while (*buf_lst < SMC_RMBE_SIZES) { - buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], - struct smc_buf_desc, list); - if (buf_pos) - return buf_pos; - (*buf_lst)++; - } - return NULL; -} - -/* return next rmb from buffer lists */ -static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, - int *buf_lst, - struct smc_buf_desc *buf_pos) -{ - struct smc_buf_desc *buf_next; - - if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { - (*buf_lst)++; - return _smc_llc_get_next_rmb(lgr, buf_lst); - } - buf_next = list_next_entry(buf_pos, list); - return buf_next; -} - -static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, - int *buf_lst) -{ - *buf_lst = 0; - return smc_llc_get_next_rmb(lgr, buf_lst, NULL); -} - /* send one add_link_continue msg */ static int smc_llc_add_link_cont(struct smc_link *link, struct smc_link *link_new, u8 *num_rkeys_todo, @@ -734,7 +860,7 @@ static int smc_llc_add_link_cont(struct smc_link *link, while (*buf_pos && !(*buf_pos)->used) *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); } - addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT; + addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT; addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); if (lgr->role == SMC_CLNT) addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; @@ -793,6 +919,8 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry) qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; + smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, + sizeof(qentry->msg)); return smc_llc_send_message(qentry->link, &qentry->msg); } @@ -813,7 +941,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } - if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { + if (qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { /* received DELETE_LINK instead */ qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); @@ -854,6 +982,26 @@ static int smc_llc_cli_conf_link(struct smc_link *link, return 0; } +static void smc_llc_save_add_link_rkeys(struct smc_link *link, + struct smc_link *link_new) +{ + struct smc_llc_msg_add_link_v2_ext *ext; + struct smc_link_group *lgr = link->lgr; + int max, i; + + ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 + + SMC_WR_TX_SIZE); + max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); + mutex_lock(&lgr->rmbs_lock); + for (i = 0; i < max; i++) { + smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, + ext->rt[i].rmb_key, + ext->rt[i].rmb_vaddr_new, + ext->rt[i].rmb_key_new); + } + mutex_unlock(&lgr->rmbs_lock); +} + static void smc_llc_save_add_link_info(struct smc_link *link, struct smc_llc_msg_add_link *add_llc) { @@ -870,31 +1018,47 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) struct smc_llc_msg_add_link *llc = &qentry->msg.add_link; enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = smc_get_lgr(link); + struct smc_init_info *ini = NULL; struct smc_link *lnk_new = NULL; - struct smc_init_info ini; int lnk_idx, rc = 0; if (!llc->qp_mtu) goto out_reject; - ini.vlan_id = lgr->vlan_id; - smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) { + rc = -ENOMEM; + goto out_reject; + } + + ini->vlan_id = lgr->vlan_id; + if (lgr->smc_version == SMC_V2) { + ini->check_smcrv2 = true; + ini->smcrv2.saddr = lgr->saddr; + ini->smcrv2.daddr = smc_ib_gid_to_ipv4(llc->sender_gid); + } + smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && - !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) { - if (!ini.ib_dev) + (lgr->smc_version == SMC_V2 || + !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN))) { + if (!ini->ib_dev && !ini->smcrv2.ib_dev_v2) goto out_reject; lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } - if (!ini.ib_dev) { + if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini.ib_dev = link->smcibdev; - ini.ib_port = link->ibport; + ini->smcrv2.ib_dev_v2 = link->smcibdev; + ini->smcrv2.ib_port_v2 = link->ibport; + } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { + lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; + ini->ib_dev = link->smcibdev; + ini->ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); if (lnk_idx < 0) goto out_reject; lnk_new = &lgr->lnk[lnk_idx]; - rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini); + rc = smcr_link_init(lgr, lnk_new, lnk_idx, ini); if (rc) goto out_reject; smc_llc_save_add_link_info(lnk_new, llc); @@ -910,16 +1074,20 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) goto out_clear_lnk; rc = smc_llc_send_add_link(link, - lnk_new->smcibdev->mac[ini.ib_port - 1], + lnk_new->smcibdev->mac[lnk_new->ibport - 1], lnk_new->gid, lnk_new, SMC_LLC_RESP); if (rc) goto out_clear_lnk; - rc = smc_llc_cli_rkey_exchange(link, lnk_new); - if (rc) { - rc = 0; - goto out_clear_lnk; + if (lgr->smc_version == SMC_V2) { + smc_llc_save_add_link_rkeys(link, lnk_new); + } else { + rc = smc_llc_cli_rkey_exchange(link, lnk_new); + if (rc) { + rc = 0; + goto out_clear_lnk; + } } - rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); + rc = smc_llc_cli_conf_link(link, ini, lnk_new, lgr_new_t); if (!rc) goto out; out_clear_lnk: @@ -928,29 +1096,78 @@ out_clear_lnk: out_reject: smc_llc_cli_add_link_reject(qentry); out: + kfree(ini); kfree(qentry); return rc; } +static void smc_llc_send_request_add_link(struct smc_link *link) +{ + struct smc_llc_msg_req_add_link_v2 *llc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_v2_buf *wr_buf; + struct smc_gidlist gidlist; + int rc, len, i; + + if (!smc_wr_tx_link_hold(link)) + return; + if (link->lgr->type == SMC_LGR_SYMMETRIC || + link->lgr->type == SMC_LGR_ASYMMETRIC_PEER) + goto put_out; + + smc_fill_gid_list(link->lgr, &gidlist, link->smcibdev, link->gid); + if (gidlist.len <= 1) + goto put_out; + + rc = smc_llc_add_pending_send_v2(link, &wr_buf, &pend); + if (rc) + goto put_out; + llc = (struct smc_llc_msg_req_add_link_v2 *)wr_buf; + memset(llc, 0, SMC_WR_TX_SIZE); + + llc->hd.common.llc_type = SMC_LLC_REQ_ADD_LINK; + for (i = 0; i < gidlist.len; i++) + memcpy(llc->gid[i], gidlist.list[i], sizeof(gidlist.list[0])); + llc->gid_cnt = gidlist.len; + len = sizeof(*llc) + (gidlist.len * sizeof(gidlist.list[0])); + smc_llc_init_msg_hdr(&llc->hd, link->lgr, len); + rc = smc_wr_tx_v2_send(link, pend, len); + if (!rc) + /* set REQ_ADD_LINK flow and wait for response from peer */ + link->lgr->llc_flow_lcl.type = SMC_LLC_FLOW_REQ_ADD_LINK; +put_out: + smc_wr_tx_link_put(link); +} + /* as an SMC client, invite server to start the add_link processing */ static void smc_llc_cli_add_link_invite(struct smc_link *link, struct smc_llc_qentry *qentry) { struct smc_link_group *lgr = smc_get_lgr(link); - struct smc_init_info ini; + struct smc_init_info *ini = NULL; + + if (lgr->smc_version == SMC_V2) { + smc_llc_send_request_add_link(link); + goto out; + } if (lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) goto out; - ini.vlan_id = lgr->vlan_id; - smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); - if (!ini.ib_dev) + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) + goto out; + + ini->vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); + if (!ini->ib_dev) goto out; - smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], - ini.ib_gid, NULL, SMC_LLC_REQ); + smc_llc_send_add_link(link, ini->ib_dev->mac[ini->ib_port - 1], + ini->ib_gid, NULL, SMC_LLC_REQ); out: + kfree(ini); kfree(qentry); } @@ -966,7 +1183,7 @@ static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc) static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) { - if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && + if (llc->raw.hdr.common.llc_type == SMC_LLC_ADD_LINK && smc_llc_is_empty_llc_message(llc)) return true; return false; @@ -1133,7 +1350,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link, /* receive CONFIRM LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); if (!qentry || - qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { + qentry->msg.raw.hdr.common.llc_type != SMC_LLC_CONFIRM_LINK) { /* send DELETE LINK */ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); @@ -1152,37 +1369,80 @@ static int smc_llc_srv_conf_link(struct smc_link *link, return 0; } -int smc_llc_srv_add_link(struct smc_link *link) +static void smc_llc_send_req_add_link_response(struct smc_llc_qentry *qentry) +{ + qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; + smc_llc_init_msg_hdr(&qentry->msg.raw.hdr, qentry->link->lgr, + sizeof(qentry->msg)); + memset(&qentry->msg.raw.data, 0, sizeof(qentry->msg.raw.data)); + smc_llc_send_message(qentry->link, &qentry->msg); +} + +int smc_llc_srv_add_link(struct smc_link *link, + struct smc_llc_qentry *req_qentry) { enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; struct smc_link_group *lgr = link->lgr; struct smc_llc_msg_add_link *add_llc; struct smc_llc_qentry *qentry = NULL; - struct smc_link *link_new; - struct smc_init_info ini; + bool send_req_add_link_resp = false; + struct smc_link *link_new = NULL; + struct smc_init_info *ini = NULL; int lnk_idx, rc = 0; + if (req_qentry && + req_qentry->msg.raw.hdr.common.llc_type == SMC_LLC_REQ_ADD_LINK) + send_req_add_link_resp = true; + + ini = kzalloc(sizeof(*ini), GFP_KERNEL); + if (!ini) { + rc = -ENOMEM; + goto out; + } + /* ignore client add link recommendation, start new flow */ - ini.vlan_id = lgr->vlan_id; - smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); - if (!ini.ib_dev) { + ini->vlan_id = lgr->vlan_id; + if (lgr->smc_version == SMC_V2) { + ini->check_smcrv2 = true; + ini->smcrv2.saddr = lgr->saddr; + if (send_req_add_link_resp) { + struct smc_llc_msg_req_add_link_v2 *req_add = + &req_qentry->msg.req_add_link; + + ini->smcrv2.daddr = smc_ib_gid_to_ipv4(req_add->gid[0]); + } + } + smc_pnet_find_alt_roce(lgr, ini, link->smcibdev); + if (lgr->smc_version == SMC_V2 && !ini->smcrv2.ib_dev_v2) { + lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; + ini->smcrv2.ib_dev_v2 = link->smcibdev; + ini->smcrv2.ib_port_v2 = link->ibport; + } else if (lgr->smc_version < SMC_V2 && !ini->ib_dev) { lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; - ini.ib_dev = link->smcibdev; - ini.ib_port = link->ibport; + ini->ib_dev = link->smcibdev; + ini->ib_port = link->ibport; } lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); - if (lnk_idx < 0) - return 0; + if (lnk_idx < 0) { + rc = 0; + goto out; + } - rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini); + rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, ini); if (rc) - return rc; + goto out; link_new = &lgr->lnk[lnk_idx]; + + rc = smcr_buf_map_lgr(link_new); + if (rc) + goto out_err; + rc = smc_llc_send_add_link(link, - link_new->smcibdev->mac[ini.ib_port - 1], + link_new->smcibdev->mac[link_new->ibport-1], link_new->gid, link_new, SMC_LLC_REQ); if (rc) goto out_err; + send_req_add_link_resp = false; /* receive ADD LINK response over the RoCE fabric */ qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK); if (!qentry) { @@ -1197,7 +1457,8 @@ int smc_llc_srv_add_link(struct smc_link *link) } if (lgr->type == SMC_LGR_SINGLE && (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && - !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) { + (lgr->smc_version == SMC_V2 || + !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN)))) { lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; } smc_llc_save_add_link_info(link_new, add_llc); @@ -1206,39 +1467,49 @@ int smc_llc_srv_add_link(struct smc_link *link) rc = smc_ib_ready_link(link_new); if (rc) goto out_err; - rc = smcr_buf_map_lgr(link_new); - if (rc) - goto out_err; rc = smcr_buf_reg_lgr(link_new); if (rc) goto out_err; - rc = smc_llc_srv_rkey_exchange(link, link_new); - if (rc) - goto out_err; + if (lgr->smc_version == SMC_V2) { + smc_llc_save_add_link_rkeys(link, link_new); + } else { + rc = smc_llc_srv_rkey_exchange(link, link_new); + if (rc) + goto out_err; + } rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); if (rc) goto out_err; + kfree(ini); return 0; out_err: - link_new->state = SMC_LNK_INACTIVE; - smcr_link_clear(link_new, false); + if (link_new) { + link_new->state = SMC_LNK_INACTIVE; + smcr_link_clear(link_new, false); + } +out: + kfree(ini); + if (send_req_add_link_resp) + smc_llc_send_req_add_link_response(req_qentry); return rc; } static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) { struct smc_link *link = lgr->llc_flow_lcl.qentry->link; + struct smc_llc_qentry *qentry; int rc; - smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); + qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); mutex_lock(&lgr->llc_conf_mutex); - rc = smc_llc_srv_add_link(link); + rc = smc_llc_srv_add_link(link, qentry); if (!rc && lgr->type == SMC_LGR_SYMMETRIC) { /* delete any asymmetric link */ smc_llc_delete_asym_link(lgr); } mutex_unlock(&lgr->llc_conf_mutex); + kfree(qentry); } /* enqueue a local add_link req to trigger a new add_link flow */ @@ -1246,8 +1517,8 @@ void smc_llc_add_link_local(struct smc_link *link) { struct smc_llc_msg_add_link add_llc = {}; - add_llc.hd.length = sizeof(add_llc); - add_llc.hd.common.type = SMC_LLC_ADD_LINK; + add_llc.hd.common.llc_type = SMC_LLC_ADD_LINK; + smc_llc_init_msg_hdr(&add_llc.hd, link->lgr, sizeof(add_llc)); /* no dev and port needed */ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); } @@ -1269,7 +1540,8 @@ static void smc_llc_add_link_work(struct work_struct *work) else smc_llc_process_srv_add_link(lgr); out: - smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); + if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_REQ_ADD_LINK) + smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } /* enqueue a local del_link msg to trigger a new del_link flow, @@ -1279,8 +1551,8 @@ void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) { struct smc_llc_msg_del_link del_llc = {}; - del_llc.hd.length = sizeof(del_llc); - del_llc.hd.common.type = SMC_LLC_DELETE_LINK; + del_llc.hd.common.llc_type = SMC_LLC_DELETE_LINK; + smc_llc_init_msg_hdr(&del_llc.hd, link->lgr, sizeof(del_llc)); del_llc.link_num = del_link_id; del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH); del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; @@ -1350,8 +1622,8 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) struct smc_llc_msg_del_link delllc = {}; int i; - delllc.hd.common.type = SMC_LLC_DELETE_LINK; - delllc.hd.length = sizeof(delllc); + delllc.hd.common.llc_type = SMC_LLC_DELETE_LINK; + smc_llc_init_msg_hdr(&delllc.hd, lgr, sizeof(delllc)); if (ord) delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; @@ -1467,6 +1739,8 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) link = qentry->link; num_entries = llc->rtoken[0].num_rkeys; + if (num_entries > SMC_LLC_RKEYS_PER_MSG) + goto out_err; /* first rkey entry is for receiving link */ rk_idx = smc_rtoken_add(link, llc->rtoken[0].rmb_vaddr, @@ -1485,6 +1759,7 @@ out_err: llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY; out: llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); } @@ -1502,6 +1777,28 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) llc = &qentry->msg.delete_rkey; link = qentry->link; + if (lgr->smc_version == SMC_V2) { + struct smc_llc_msg_delete_rkey_v2 *llcv2; + + memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); + llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; + llcv2->num_inval_rkeys = 0; + + max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); + for (i = 0; i < max; i++) { + if (smc_rtoken_delete(link, llcv2->rkey[i])) + llcv2->num_inval_rkeys++; + } + memset(&llc->rkey[0], 0, sizeof(llc->rkey)); + memset(&llc->reserved2, 0, sizeof(llc->reserved2)); + smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); + if (llcv2->num_inval_rkeys) { + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + llc->err_mask = llcv2->num_inval_rkeys; + } + goto finish; + } + max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { if (smc_rtoken_delete(link, llc->rkey[i])) @@ -1511,6 +1808,7 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; llc->err_mask = err_mask; } +finish: llc->hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); @@ -1546,7 +1844,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (!smc_link_usable(link)) goto out; - switch (llc->raw.hdr.common.type) { + switch (llc->raw.hdr.common.llc_type) { case SMC_LLC_TEST_LINK: llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, llc); @@ -1571,8 +1869,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); wake_up(&lgr->llc_msg_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { + return; + } + if (lgr->llc_flow_lcl.type == + SMC_LLC_FLOW_REQ_ADD_LINK) { + /* server started add_link processing */ + lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; + smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, + qentry); + schedule_work(&lgr->llc_add_link_work); + return; + } + if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_add_link_work); } } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { @@ -1620,6 +1928,23 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; + case SMC_LLC_REQ_ADD_LINK: + /* handle response here, smc_llc_flow_stop() cannot be called + * in tasklet context + */ + if (lgr->role == SMC_CLNT && + lgr->llc_flow_lcl.type == SMC_LLC_FLOW_REQ_ADD_LINK && + (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP)) { + smc_llc_flow_stop(link->lgr, &lgr->llc_flow_lcl); + } else if (lgr->role == SMC_SERV) { + if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { + /* as smc server, handle client suggestion */ + lgr->llc_flow_lcl.type = SMC_LLC_FLOW_ADD_LINK; + schedule_work(&lgr->llc_add_link_work); + } + return; + } + break; default: smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); break; @@ -1663,7 +1988,7 @@ static void smc_llc_rx_response(struct smc_link *link, { enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; - u8 llc_type = qentry->msg.raw.hdr.common.type; + u8 llc_type = qentry->msg.raw.hdr.common.llc_type; switch (llc_type) { case SMC_LLC_TEST_LINK: @@ -1689,7 +2014,8 @@ static void smc_llc_rx_response(struct smc_link *link, /* not used because max links is 3 */ break; default: - smc_llc_protocol_violation(link->lgr, llc_type); + smc_llc_protocol_violation(link->lgr, + qentry->msg.raw.hdr.common.type); break; } kfree(qentry); @@ -1714,7 +2040,8 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg)); /* process responses immediately */ - if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) { + if ((llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) && + llc->raw.hdr.common.llc_type != SMC_LLC_REQ_ADD_LINK) { smc_llc_rx_response(link, qentry); return; } @@ -1734,8 +2061,13 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) if (wc->byte_len < sizeof(*llc)) return; /* short message */ - if (llc->raw.hdr.length != sizeof(*llc)) - return; /* invalid message */ + if (!llc->raw.hdr.common.llc_version) { + if (llc->raw.hdr.length != sizeof(*llc)) + return; /* invalid message */ + } else { + if (llc->raw.hdr.length_v2 < sizeof(*llc)) + return; /* invalid message */ + } smc_llc_enqueue(link, llc); } @@ -1954,6 +2286,35 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { .handler = smc_llc_rx_handler, .type = SMC_LLC_DELETE_RKEY }, + /* V2 types */ + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_TEST_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_ADD_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_REQ_ADD_LINK_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY_V2 + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_RKEY_V2 + }, { .handler = NULL, } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index cc00a2ec4e92..4404e52b3346 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -30,10 +30,19 @@ enum smc_llc_msg_type { SMC_LLC_ADD_LINK = 0x02, SMC_LLC_ADD_LINK_CONT = 0x03, SMC_LLC_DELETE_LINK = 0x04, + SMC_LLC_REQ_ADD_LINK = 0x05, SMC_LLC_CONFIRM_RKEY = 0x06, SMC_LLC_TEST_LINK = 0x07, SMC_LLC_CONFIRM_RKEY_CONT = 0x08, SMC_LLC_DELETE_RKEY = 0x09, + /* V2 types */ + SMC_LLC_CONFIRM_LINK_V2 = 0x21, + SMC_LLC_ADD_LINK_V2 = 0x22, + SMC_LLC_DELETE_LINK_V2 = 0x24, + SMC_LLC_REQ_ADD_LINK_V2 = 0x25, + SMC_LLC_CONFIRM_RKEY_V2 = 0x26, + SMC_LLC_TEST_LINK_V2 = 0x27, + SMC_LLC_DELETE_RKEY_V2 = 0x29, }; #define smc_link_downing(state) \ @@ -102,7 +111,8 @@ void smc_llc_flow_qentry_del(struct smc_llc_flow *flow); void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); -int smc_llc_srv_add_link(struct smc_link *link); +int smc_llc_srv_add_link(struct smc_link *link, + struct smc_llc_qentry *req_qentry); void smc_llc_add_link_local(struct smc_link *link); int smc_llc_init(void) __init; diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c index 6fb6f96c1d17..f13ab0661ed5 100644 --- a/net/smc/smc_netlink.c +++ b/net/smc/smc_netlink.c @@ -19,11 +19,19 @@ #include "smc_core.h" #include "smc_ism.h" #include "smc_ib.h" +#include "smc_clc.h" #include "smc_stats.h" #include "smc_netlink.h" -#define SMC_CMD_MAX_ATTR 1 +const struct nla_policy +smc_gen_ueid_policy[SMC_NLA_EID_TABLE_MAX + 1] = { + [SMC_NLA_EID_TABLE_UNSPEC] = { .type = NLA_UNSPEC }, + [SMC_NLA_EID_TABLE_ENTRY] = { .type = NLA_STRING, + .len = SMC_MAX_EID_LEN, + }, +}; +#define SMC_CMD_MAX_ATTR 1 /* SMC_GENL generic netlink operation definition */ static const struct genl_ops smc_gen_nl_ops[] = { { @@ -66,6 +74,43 @@ static const struct genl_ops smc_gen_nl_ops[] = { /* can be retrieved by unprivileged users */ .dumpit = smc_nl_get_fback_stats, }, + { + .cmd = SMC_NETLINK_DUMP_UEID, + /* can be retrieved by unprivileged users */ + .dumpit = smc_nl_dump_ueid, + }, + { + .cmd = SMC_NETLINK_ADD_UEID, + .flags = GENL_ADMIN_PERM, + .doit = smc_nl_add_ueid, + .policy = smc_gen_ueid_policy, + }, + { + .cmd = SMC_NETLINK_REMOVE_UEID, + .flags = GENL_ADMIN_PERM, + .doit = smc_nl_remove_ueid, + .policy = smc_gen_ueid_policy, + }, + { + .cmd = SMC_NETLINK_FLUSH_UEID, + .flags = GENL_ADMIN_PERM, + .doit = smc_nl_flush_ueid, + }, + { + .cmd = SMC_NETLINK_DUMP_SEID, + /* can be retrieved by unprivileged users */ + .dumpit = smc_nl_dump_seid, + }, + { + .cmd = SMC_NETLINK_ENABLE_SEID, + .flags = GENL_ADMIN_PERM, + .doit = smc_nl_enable_seid, + }, + { + .cmd = SMC_NETLINK_DISABLE_SEID, + .flags = GENL_ADMIN_PERM, + .doit = smc_nl_disable_seid, + }, }; static const struct nla_policy smc_gen_nl_policy[2] = { diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h index 5ce2c0a89ccd..e8c6c3f0e98c 100644 --- a/net/smc/smc_netlink.h +++ b/net/smc/smc_netlink.h @@ -17,6 +17,8 @@ extern struct genl_family smc_gen_nl_family; +extern const struct nla_policy smc_gen_ueid_policy[]; + struct smc_nl_dmp_ctx { int pos[3]; }; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 4a964e9190b0..67e9d9fde085 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -953,6 +953,26 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, return rc; } +static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i, + struct smc_init_info *ini) +{ + if (!ini->check_smcrv2 && + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL, + NULL)) { + ini->ib_dev = ibdev; + ini->ib_port = i; + return 0; + } + if (ini->check_smcrv2 && + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2, + NULL, &ini->smcrv2)) { + ini->smcrv2.ib_dev_v2 = ibdev; + ini->smcrv2.ib_port_v2 = i; + return 0; + } + return -ENODEV; +} + /* find a roce device for the given pnetid */ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_init_info *ini, @@ -961,7 +981,6 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_ib_device *ibdev; int i; - ini->ib_dev = NULL; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (ibdev == known_dev) @@ -971,12 +990,9 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, continue; if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && smc_ib_port_active(ibdev, i) && - !test_bit(i - 1, ibdev->ports_going_away) && - !smc_ib_determine_gid(ibdev, i, ini->vlan_id, - ini->ib_gid, NULL)) { - ini->ib_dev = ibdev; - ini->ib_port = i; - goto out; + !test_bit(i - 1, ibdev->ports_going_away)) { + if (!smc_pnet_determine_gid(ibdev, i, ini)) + goto out; } } } @@ -1016,12 +1032,9 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, dev_put(ndev); if (netdev == ndev && smc_ib_port_active(ibdev, i) && - !test_bit(i - 1, ibdev->ports_going_away) && - !smc_ib_determine_gid(ibdev, i, ini->vlan_id, - ini->ib_gid, NULL)) { - ini->ib_dev = ibdev; - ini->ib_port = i; - break; + !test_bit(i - 1, ibdev->ports_going_away)) { + if (!smc_pnet_determine_gid(ibdev, i, ini)) + break; } } } @@ -1083,8 +1096,6 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); - ini->ib_dev = NULL; - ini->ib_port = 0; if (!dst) goto out; if (!dst->dev) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index a419e9af36b9..600ab5889227 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -101,19 +101,32 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); - if (pnd_snd_idx == link->wr_tx_cnt) - return; - link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; - if (link->wr_tx_pends[pnd_snd_idx].compl_requested) - complete(&link->wr_tx_compl[pnd_snd_idx]); - memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); - /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pnd_snd_idx], 0, - sizeof(link->wr_tx_pends[pnd_snd_idx])); - memset(&link->wr_tx_bufs[pnd_snd_idx], 0, - sizeof(link->wr_tx_bufs[pnd_snd_idx])); - if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) - return; + if (pnd_snd_idx == link->wr_tx_cnt) { + if (link->lgr->smc_version != SMC_V2 || + link->wr_tx_v2_pend->wr_id != wc->wr_id) + return; + link->wr_tx_v2_pend->wc_status = wc->status; + memcpy(&pnd_snd, link->wr_tx_v2_pend, sizeof(pnd_snd)); + /* clear the full struct smc_wr_tx_pend including .priv */ + memset(link->wr_tx_v2_pend, 0, + sizeof(*link->wr_tx_v2_pend)); + memset(link->lgr->wr_tx_buf_v2, 0, + sizeof(*link->lgr->wr_tx_buf_v2)); + } else { + link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; + if (link->wr_tx_pends[pnd_snd_idx].compl_requested) + complete(&link->wr_tx_compl[pnd_snd_idx]); + memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], + sizeof(pnd_snd)); + /* clear the full struct smc_wr_tx_pend including .priv */ + memset(&link->wr_tx_pends[pnd_snd_idx], 0, + sizeof(link->wr_tx_pends[pnd_snd_idx])); + memset(&link->wr_tx_bufs[pnd_snd_idx], 0, + sizeof(link->wr_tx_bufs[pnd_snd_idx])); + if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) + return; + } + if (wc->status) { for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { /* clear full struct smc_wr_tx_pend including .priv */ @@ -123,6 +136,12 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) sizeof(link->wr_tx_bufs[i])); clear_bit(i, link->wr_tx_mask); } + if (link->lgr->smc_version == SMC_V2) { + memset(link->wr_tx_v2_pend, 0, + sizeof(*link->wr_tx_v2_pend)); + memset(link->lgr->wr_tx_buf_v2, 0, + sizeof(*link->lgr->wr_tx_buf_v2)); + } /* terminate link */ smcr_link_down_cond_sched(link); } @@ -239,6 +258,33 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, return 0; } +int smc_wr_tx_get_v2_slot(struct smc_link *link, + smc_wr_tx_handler handler, + struct smc_wr_v2_buf **wr_buf, + struct smc_wr_tx_pend_priv **wr_pend_priv) +{ + struct smc_wr_tx_pend *wr_pend; + struct ib_send_wr *wr_ib; + u64 wr_id; + + if (link->wr_tx_v2_pend->idx == link->wr_tx_cnt) + return -EBUSY; + + *wr_buf = NULL; + *wr_pend_priv = NULL; + wr_id = smc_wr_tx_get_next_wr_id(link); + wr_pend = link->wr_tx_v2_pend; + wr_pend->wr_id = wr_id; + wr_pend->handler = handler; + wr_pend->link = link; + wr_pend->idx = link->wr_tx_cnt; + wr_ib = link->wr_tx_v2_ib; + wr_ib->wr_id = wr_id; + *wr_buf = link->lgr->wr_tx_buf_v2; + *wr_pend_priv = &wr_pend->priv; + return 0; +} + int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv) { @@ -256,6 +302,14 @@ int smc_wr_tx_put_slot(struct smc_link *link, test_and_clear_bit(idx, link->wr_tx_mask); wake_up(&link->wr_tx_wait); return 1; + } else if (link->lgr->smc_version == SMC_V2 && + pend->idx == link->wr_tx_cnt) { + /* Large v2 buffer */ + memset(&link->wr_tx_v2_pend, 0, + sizeof(link->wr_tx_v2_pend)); + memset(&link->lgr->wr_tx_buf_v2, 0, + sizeof(link->lgr->wr_tx_buf_v2)); + return 1; } return 0; @@ -280,6 +334,22 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } +int smc_wr_tx_v2_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, + int len) +{ + int rc; + + link->wr_tx_v2_ib->sg_list[0].length = len; + ib_req_notify_cq(link->smcibdev->roce_cq_send, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + rc = ib_post_send(link->roce_qp, link->wr_tx_v2_ib, NULL); + if (rc) { + smc_wr_tx_put_slot(link, priv); + smcr_link_down_cond_sched(link); + } + return rc; +} + /* Send prepared WR slot via ib_post_send and wait for send completion * notification. * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer @@ -517,6 +587,7 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) static void smc_wr_init_sge(struct smc_link *lnk) { + int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; u32 i; for (i = 0; i < lnk->wr_tx_cnt; i++) { @@ -545,14 +616,44 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; } + + if (lnk->lgr->smc_version == SMC_V2) { + lnk->wr_tx_v2_sge->addr = lnk->wr_tx_v2_dma_addr; + lnk->wr_tx_v2_sge->length = SMC_WR_BUF_V2_SIZE; + lnk->wr_tx_v2_sge->lkey = lnk->roce_pd->local_dma_lkey; + + lnk->wr_tx_v2_ib->next = NULL; + lnk->wr_tx_v2_ib->sg_list = lnk->wr_tx_v2_sge; + lnk->wr_tx_v2_ib->num_sge = 1; + lnk->wr_tx_v2_ib->opcode = IB_WR_SEND; + lnk->wr_tx_v2_ib->send_flags = + IB_SEND_SIGNALED | IB_SEND_SOLICITED; + } + + /* With SMC-Rv2 there can be messages larger than SMC_WR_TX_SIZE. + * Each ib_recv_wr gets 2 sges, the second one is a spillover buffer + * and the same buffer for all sges. When a larger message arrived then + * the content of the first small sge is copied to the beginning of + * the larger spillover buffer, allowing easy data mapping. + */ for (i = 0; i < lnk->wr_rx_cnt; i++) { - lnk->wr_rx_sges[i].addr = + int x = i * sges_per_buf; + + lnk->wr_rx_sges[x].addr = lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; - lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE; - lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; + lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE; + lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey; + if (lnk->lgr->smc_version == SMC_V2) { + lnk->wr_rx_sges[x + 1].addr = + lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE; + lnk->wr_rx_sges[x + 1].length = + SMC_WR_BUF_V2_SIZE - SMC_WR_TX_SIZE; + lnk->wr_rx_sges[x + 1].lkey = + lnk->roce_pd->local_dma_lkey; + } lnk->wr_rx_ibs[i].next = NULL; - lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; - lnk->wr_rx_ibs[i].num_sge = 1; + lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x]; + lnk->wr_rx_ibs[i].num_sge = sges_per_buf; } lnk->wr_reg.wr.next = NULL; lnk->wr_reg.wr.num_sge = 0; @@ -585,16 +686,45 @@ void smc_wr_free_link(struct smc_link *lnk) DMA_FROM_DEVICE); lnk->wr_rx_dma_addr = 0; } + if (lnk->wr_rx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_FROM_DEVICE); + lnk->wr_rx_v2_dma_addr = 0; + } if (lnk->wr_tx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, DMA_TO_DEVICE); lnk->wr_tx_dma_addr = 0; } + if (lnk->wr_tx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_TO_DEVICE); + lnk->wr_tx_v2_dma_addr = 0; + } +} + +void smc_wr_free_lgr_mem(struct smc_link_group *lgr) +{ + if (lgr->smc_version < SMC_V2) + return; + + kfree(lgr->wr_rx_buf_v2); + lgr->wr_rx_buf_v2 = NULL; + kfree(lgr->wr_tx_buf_v2); + lgr->wr_tx_buf_v2 = NULL; } void smc_wr_free_link_mem(struct smc_link *lnk) { + kfree(lnk->wr_tx_v2_ib); + lnk->wr_tx_v2_ib = NULL; + kfree(lnk->wr_tx_v2_sge); + lnk->wr_tx_v2_sge = NULL; + kfree(lnk->wr_tx_v2_pend); + lnk->wr_tx_v2_pend = NULL; kfree(lnk->wr_tx_compl); lnk->wr_tx_compl = NULL; kfree(lnk->wr_tx_pends); @@ -619,8 +749,26 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_rx_bufs = NULL; } +int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr) +{ + if (lgr->smc_version < SMC_V2) + return 0; + + lgr->wr_rx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL); + if (!lgr->wr_rx_buf_v2) + return -ENOMEM; + lgr->wr_tx_buf_v2 = kzalloc(SMC_WR_BUF_V2_SIZE, GFP_KERNEL); + if (!lgr->wr_tx_buf_v2) { + kfree(lgr->wr_rx_buf_v2); + return -ENOMEM; + } + return 0; +} + int smc_wr_alloc_link_mem(struct smc_link *link) { + int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1; + /* allocate link related memory */ link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) @@ -653,7 +801,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, - sizeof(link->wr_rx_sges[0]), + sizeof(link->wr_rx_sges[0]) * sges_per_buf, GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; @@ -672,8 +820,29 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_tx_compl) goto no_mem_wr_tx_pends; + + if (link->lgr->smc_version == SMC_V2) { + link->wr_tx_v2_ib = kzalloc(sizeof(*link->wr_tx_v2_ib), + GFP_KERNEL); + if (!link->wr_tx_v2_ib) + goto no_mem_tx_compl; + link->wr_tx_v2_sge = kzalloc(sizeof(*link->wr_tx_v2_sge), + GFP_KERNEL); + if (!link->wr_tx_v2_sge) + goto no_mem_v2_ib; + link->wr_tx_v2_pend = kzalloc(sizeof(*link->wr_tx_v2_pend), + GFP_KERNEL); + if (!link->wr_tx_v2_pend) + goto no_mem_v2_sge; + } return 0; +no_mem_v2_sge: + kfree(link->wr_tx_v2_sge); +no_mem_v2_ib: + kfree(link->wr_tx_v2_ib); +no_mem_tx_compl: + kfree(link->wr_tx_compl); no_mem_wr_tx_pends: kfree(link->wr_tx_pends); no_mem_wr_tx_mask: @@ -725,6 +894,24 @@ int smc_wr_create_link(struct smc_link *lnk) rc = -EIO; goto out; } + if (lnk->lgr->smc_version == SMC_V2) { + lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev, + lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) { + lnk->wr_rx_v2_dma_addr = 0; + rc = -EIO; + goto dma_unmap; + } + lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev, + lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(ibdev, lnk->wr_tx_v2_dma_addr)) { + lnk->wr_tx_v2_dma_addr = 0; + rc = -EIO; + goto dma_unmap; + } + } lnk->wr_tx_dma_addr = ib_dma_map_single( ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, DMA_TO_DEVICE); @@ -742,6 +929,18 @@ int smc_wr_create_link(struct smc_link *lnk) return rc; dma_unmap: + if (lnk->wr_rx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_rx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_FROM_DEVICE); + lnk->wr_rx_v2_dma_addr = 0; + } + if (lnk->wr_tx_v2_dma_addr) { + ib_dma_unmap_single(ibdev, lnk->wr_tx_v2_dma_addr, + SMC_WR_BUF_V2_SIZE, + DMA_TO_DEVICE); + lnk->wr_tx_v2_dma_addr = 0; + } ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, DMA_FROM_DEVICE); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 2bc626f230a5..f353311e6f84 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -101,8 +101,10 @@ static inline int smc_wr_rx_post(struct smc_link *link) int smc_wr_create_link(struct smc_link *lnk); int smc_wr_alloc_link_mem(struct smc_link *lnk); +int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr); void smc_wr_free_link(struct smc_link *lnk); void smc_wr_free_link_mem(struct smc_link *lnk); +void smc_wr_free_lgr_mem(struct smc_link_group *lgr); void smc_wr_remember_qp_attr(struct smc_link *lnk); void smc_wr_remove_dev(struct smc_ib_device *smcibdev); void smc_wr_add_dev(struct smc_ib_device *smcibdev); @@ -111,10 +113,16 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wrs, struct smc_wr_tx_pend_priv **wr_pend_priv); +int smc_wr_tx_get_v2_slot(struct smc_link *link, + smc_wr_tx_handler handler, + struct smc_wr_v2_buf **wr_buf, + struct smc_wr_tx_pend_priv **wr_pend_priv); int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); +int smc_wr_tx_v2_send(struct smc_link *link, + struct smc_wr_tx_pend_priv *priv, int len); int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 443f8e5b9477..60bc74b76adc 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -462,7 +462,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, b->bcast_addr.media_id = b->media->type_id; b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; b->mtu = dev->mtu; - b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); + b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr); rcu_assign_pointer(dev->tipc_ptr, b); return 0; } @@ -703,7 +703,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, - (char *)dev->dev_addr); + (const char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 57c6a1a719e2..490ad6e5f7a3 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -117,7 +117,7 @@ struct tipc_media { char *msg); int (*raw2addr)(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *raw); + const char *raw); u32 priority; u32 tolerance; u32 min_win; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index c68019697cfe..cb0d185e06af 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -60,7 +60,7 @@ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw mac address format to media addr format */ static int tipc_eth_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *msg) + const char *msg) { memset(addr, 0, sizeof(*addr)); ether_addr_copy(addr->value, msg); diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 7aa9ff88458d..b9ad0434c3cd 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -67,7 +67,7 @@ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) /* Convert raw InfiniBand address format to media addr format */ static int tipc_ib_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, - char *msg) + const char *msg) { memset(addr, 0, sizeof(*addr)); memcpy(addr->value, msg, INFINIBAND_ALEN); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index fde56ff49163..d44399efeac6 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -421,6 +421,46 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EFAULT; break; } + case TLS_CIPHER_SM4_GCM: { + struct tls12_crypto_info_sm4_gcm *sm4_gcm_info = + container_of(crypto_info, + struct tls12_crypto_info_sm4_gcm, info); + + if (len != sizeof(*sm4_gcm_info)) { + rc = -EINVAL; + goto out; + } + lock_sock(sk); + memcpy(sm4_gcm_info->iv, + cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE, + TLS_CIPHER_SM4_GCM_IV_SIZE); + memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq, + TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE); + release_sock(sk); + if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info))) + rc = -EFAULT; + break; + } + case TLS_CIPHER_SM4_CCM: { + struct tls12_crypto_info_sm4_ccm *sm4_ccm_info = + container_of(crypto_info, + struct tls12_crypto_info_sm4_ccm, info); + + if (len != sizeof(*sm4_ccm_info)) { + rc = -EINVAL; + goto out; + } + lock_sock(sk); + memcpy(sm4_ccm_info->iv, + cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE, + TLS_CIPHER_SM4_CCM_IV_SIZE); + memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq, + TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE); + release_sock(sk); + if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info))) + rc = -EFAULT; + break; + } default: rc = -EINVAL; } @@ -524,6 +564,12 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, case TLS_CIPHER_CHACHA20_POLY1305: optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305); break; + case TLS_CIPHER_SM4_GCM: + optsize = sizeof(struct tls12_crypto_info_sm4_gcm); + break; + case TLS_CIPHER_SM4_CCM: + optsize = sizeof(struct tls12_crypto_info_sm4_ccm); + break; default: rc = -EINVAL; goto err_crypto_info; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 4feb95e34b64..4147bb2e7057 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -498,9 +498,15 @@ static int tls_do_encryption(struct sock *sk, int rc, iv_offset = 0; /* For CCM based ciphers, first byte of IV is a constant */ - if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { + switch (prot->cipher_type) { + case TLS_CIPHER_AES_CCM_128: rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE; iv_offset = 1; + break; + case TLS_CIPHER_SM4_CCM: + rec->iv_data[0] = TLS_SM4_CCM_IV_B0_BYTE; + iv_offset = 1; + break; } memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, @@ -1457,10 +1463,16 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aad = (u8 *)(sgout + n_sgout); iv = aad + prot->aad_size; - /* For CCM based ciphers, first byte of nonce+iv is always '2' */ - if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { - iv[0] = 2; + /* For CCM based ciphers, first byte of nonce+iv is a constant */ + switch (prot->cipher_type) { + case TLS_CIPHER_AES_CCM_128: + iv[0] = TLS_AES_CCM_IV_B0_BYTE; + iv_offset = 1; + break; + case TLS_CIPHER_SM4_CCM: + iv[0] = TLS_SM4_CCM_IV_B0_BYTE; iv_offset = 1; + break; } /* Prepare IV */ @@ -2424,6 +2436,40 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) cipher_name = "rfc7539(chacha20,poly1305)"; break; } + case TLS_CIPHER_SM4_GCM: { + struct tls12_crypto_info_sm4_gcm *sm4_gcm_info; + + sm4_gcm_info = (void *)crypto_info; + nonce_size = TLS_CIPHER_SM4_GCM_IV_SIZE; + tag_size = TLS_CIPHER_SM4_GCM_TAG_SIZE; + iv_size = TLS_CIPHER_SM4_GCM_IV_SIZE; + iv = sm4_gcm_info->iv; + rec_seq_size = TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE; + rec_seq = sm4_gcm_info->rec_seq; + keysize = TLS_CIPHER_SM4_GCM_KEY_SIZE; + key = sm4_gcm_info->key; + salt = sm4_gcm_info->salt; + salt_size = TLS_CIPHER_SM4_GCM_SALT_SIZE; + cipher_name = "gcm(sm4)"; + break; + } + case TLS_CIPHER_SM4_CCM: { + struct tls12_crypto_info_sm4_ccm *sm4_ccm_info; + + sm4_ccm_info = (void *)crypto_info; + nonce_size = TLS_CIPHER_SM4_CCM_IV_SIZE; + tag_size = TLS_CIPHER_SM4_CCM_TAG_SIZE; + iv_size = TLS_CIPHER_SM4_CCM_IV_SIZE; + iv = sm4_ccm_info->iv; + rec_seq_size = TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE; + rec_seq = sm4_ccm_info->rec_seq; + keysize = TLS_CIPHER_SM4_CCM_KEY_SIZE; + key = sm4_ccm_info->key; + salt = sm4_ccm_info->salt; + salt_size = TLS_CIPHER_SM4_CCM_SALT_SIZE; + cipher_name = "ccm(sm4)"; + break; + } default: rc = -EINVAL; goto free_priv; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index e2c0cfb334d2..7d851eb3a683 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1614,13 +1614,18 @@ static int vsock_connectible_setsockopt(struct socket *sock, vsock_update_buffer_size(vsk, transport, vsk->buffer_size); break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct __kernel_old_timeval tv; - COPY_IN(tv); + case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: + case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: { + struct __kernel_sock_timeval tv; + + err = sock_copy_user_timeval(&tv, optval, optlen, + optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); + if (err) + break; if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { vsk->connect_timeout = tv.tv_sec * HZ + - DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ)); if (vsk->connect_timeout == 0) vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; @@ -1648,68 +1653,59 @@ static int vsock_connectible_getsockopt(struct socket *sock, char __user *optval, int __user *optlen) { - int err; + struct sock *sk = sock->sk; + struct vsock_sock *vsk = vsock_sk(sk); + + union { + u64 val64; + struct old_timeval32 tm32; + struct __kernel_old_timeval tm; + struct __kernel_sock_timeval stm; + } v; + + int lv = sizeof(v.val64); int len; - struct sock *sk; - struct vsock_sock *vsk; - u64 val; if (level != AF_VSOCK) return -ENOPROTOOPT; - err = get_user(len, optlen); - if (err != 0) - return err; - -#define COPY_OUT(_v) \ - do { \ - if (len < sizeof(_v)) \ - return -EINVAL; \ - \ - len = sizeof(_v); \ - if (copy_to_user(optval, &_v, len) != 0) \ - return -EFAULT; \ - \ - } while (0) + if (get_user(len, optlen)) + return -EFAULT; - err = 0; - sk = sock->sk; - vsk = vsock_sk(sk); + memset(&v, 0, sizeof(v)); switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: - val = vsk->buffer_size; - COPY_OUT(val); + v.val64 = vsk->buffer_size; break; case SO_VM_SOCKETS_BUFFER_MAX_SIZE: - val = vsk->buffer_max_size; - COPY_OUT(val); + v.val64 = vsk->buffer_max_size; break; case SO_VM_SOCKETS_BUFFER_MIN_SIZE: - val = vsk->buffer_min_size; - COPY_OUT(val); + v.val64 = vsk->buffer_min_size; break; - case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct __kernel_old_timeval tv; - tv.tv_sec = vsk->connect_timeout / HZ; - tv.tv_usec = - (vsk->connect_timeout - - tv.tv_sec * HZ) * (1000000 / HZ); - COPY_OUT(tv); + case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW: + case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: + lv = sock_get_timeout(vsk->connect_timeout, &v, + optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD); break; - } + default: return -ENOPROTOOPT; } - err = put_user(len, optlen); - if (err != 0) + if (len < lv) + return -EINVAL; + if (len > lv) + len = lv; + if (copy_to_user(optval, &v, len)) return -EFAULT; -#undef COPY_OUT + if (put_user(len, optlen)) + return -EFAULT; return 0; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d6b500dc4208..f16074eb53c7 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -134,21 +134,6 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, return 0; } -void xp_release(struct xdp_buff_xsk *xskb) -{ - xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; -} - -static u64 xp_get_handle(struct xdp_buff_xsk *xskb) -{ - u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; - - offset += xskb->pool->headroom; - if (!xskb->pool->unaligned) - return xskb->orig_addr + offset; - return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); -} - static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 8de01aaac4a0..90c4e1e819d3 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -44,12 +44,13 @@ void xp_destroy(struct xsk_buff_pool *pool) struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem) { + bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; struct xsk_buff_pool *pool; struct xdp_buff_xsk *xskb; - u32 i; + u32 i, entries; - pool = kvzalloc(struct_size(pool, free_heads, umem->chunks), - GFP_KERNEL); + entries = unaligned ? umem->chunks : 0; + pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL); if (!pool) goto out; @@ -63,7 +64,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->free_heads_cnt = umem->chunks; pool->headroom = umem->headroom; pool->chunk_size = umem->chunk_size; - pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; + pool->chunk_shift = ffs(umem->chunk_size) - 1; + pool->unaligned = unaligned; pool->frame_len = umem->chunk_size - umem->headroom - XDP_PACKET_HEADROOM; pool->umem = umem; @@ -81,7 +83,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; - pool->free_heads[i] = xskb; + if (pool->unaligned) + pool->free_heads[i] = xskb; + else + xp_init_xskb_addr(xskb, pool, i * pool->chunk_size); } return pool; @@ -406,6 +411,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(dma_map); + else + for (i = 0; i < pool->heads_cnt; i++) { + struct xdp_buff_xsk *xskb = &pool->heads[i]; + + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); + } err = xp_init_dma_info(pool, dma_map); if (err) { @@ -448,12 +459,9 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) if (pool->free_heads_cnt == 0) return NULL; - xskb = pool->free_heads[--pool->free_heads_cnt]; - for (;;) { if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) { pool->fq->queue_empty_descs++; - xp_release(xskb); return NULL; } @@ -466,17 +474,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) } break; } - xskq_cons_release(pool->fq); - xskb->orig_addr = addr; - xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; - if (pool->dma_pages_cnt) { - xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] & - ~XSK_NEXT_PG_CONTIG_MASK) + - (addr & ~PAGE_MASK); - xskb->dma = xskb->frame_dma + pool->headroom + - XDP_PACKET_HEADROOM; + if (pool->unaligned) { + xskb = pool->free_heads[--pool->free_heads_cnt]; + xp_init_xskb_addr(xskb, pool, addr); + if (pool->dma_pages_cnt) + xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); + } else { + xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; } + + xskq_cons_release(pool->fq); return xskb; } @@ -507,6 +515,96 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) } EXPORT_SYMBOL(xp_alloc); +static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + u32 i, cached_cons, nb_entries; + + if (max > pool->free_heads_cnt) + max = pool->free_heads_cnt; + max = xskq_cons_nb_entries(pool->fq, max); + + cached_cons = pool->fq->cached_cons; + nb_entries = max; + i = max; + while (i--) { + struct xdp_buff_xsk *xskb; + u64 addr; + bool ok; + + __xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr); + + ok = pool->unaligned ? xp_check_unaligned(pool, &addr) : + xp_check_aligned(pool, &addr); + if (unlikely(!ok)) { + pool->fq->invalid_descs++; + nb_entries--; + continue; + } + + if (pool->unaligned) { + xskb = pool->free_heads[--pool->free_heads_cnt]; + xp_init_xskb_addr(xskb, pool, addr); + if (pool->dma_pages_cnt) + xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); + } else { + xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; + } + + *xdp = &xskb->xdp; + xdp++; + } + + xskq_cons_release_n(pool->fq, max); + return nb_entries; +} + +static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries) +{ + struct xdp_buff_xsk *xskb; + u32 i; + + nb_entries = min_t(u32, nb_entries, pool->free_list_cnt); + + i = nb_entries; + while (i--) { + xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node); + list_del(&xskb->free_list_node); + + *xdp = &xskb->xdp; + xdp++; + } + pool->free_list_cnt -= nb_entries; + + return nb_entries; +} + +u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + u32 nb_entries1 = 0, nb_entries2; + + if (unlikely(pool->dma_need_sync)) { + /* Slow path */ + *xdp = xp_alloc(pool); + return !!*xdp; + } + + if (unlikely(pool->free_list_cnt)) { + nb_entries1 = xp_alloc_reused(pool, xdp, max); + if (nb_entries1 == max) + return nb_entries1; + + max -= nb_entries1; + xdp += nb_entries1; + } + + nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max); + if (!nb_entries2) + pool->fq->queue_empty_descs++; + + return nb_entries1 + nb_entries2; +} +EXPORT_SYMBOL(xp_alloc_batch); + bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count) { if (pool->free_list_cnt >= count) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 9ae13cccfb28..e9aa2c236356 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -111,14 +111,18 @@ struct xsk_queue { /* Functions that read and validate content from consumer rings. */ -static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) +static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; + u32 idx = cached_cons & q->ring_mask; - if (q->cached_cons != q->cached_prod) { - u32 idx = q->cached_cons & q->ring_mask; + *addr = ring->desc[idx]; +} - *addr = ring->desc[idx]; +static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) +{ + if (q->cached_cons != q->cached_prod) { + __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr); return true; } |