diff options
209 files changed, 7524 insertions, 1681 deletions
diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt index 999aceadb985..beca6466d59a 100644 --- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt +++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt @@ -31,6 +31,7 @@ Optional properties for compatible string qcom,wcn399x-bt: - max-speed: see Documentation/devicetree/bindings/serial/slave-device.txt - firmware-name: specify the name of nvm firmware to load + - clocks: clock provided to the controller Examples: @@ -57,5 +58,6 @@ serial@898000 { vddch0-supply = <&vreg_l25a_3p3>; max-speed = <3200000>; firmware-name = "crnv21.bin"; + clocks = <&rpmhcc RPMH_RF_CLK2>; }; }; diff --git a/Documentation/networking/6lowpan.txt b/Documentation/networking/6lowpan.rst index 2e5a939d7e6f..e70a6520cc33 100644 --- a/Documentation/networking/6lowpan.txt +++ b/Documentation/networking/6lowpan.rst @@ -1,37 +1,40 @@ +.. SPDX-License-Identifier: GPL-2.0 -Netdev private dataroom for 6lowpan interfaces: +============================================== +Netdev private dataroom for 6lowpan interfaces +============================================== All 6lowpan able net devices, means all interfaces with ARPHRD_6LOWPAN, must have "struct lowpan_priv" placed at beginning of netdev_priv. -The priv_size of each interface should be calculate by: +The priv_size of each interface should be calculate by:: dev->priv_size = LOWPAN_PRIV_SIZE(LL_6LOWPAN_PRIV_DATA); Where LL_PRIV_6LOWPAN_DATA is sizeof linklayer 6lowpan private data struct. -To access the LL_PRIV_6LOWPAN_DATA structure you can cast: +To access the LL_PRIV_6LOWPAN_DATA structure you can cast:: lowpan_priv(dev)-priv; to your LL_6LOWPAN_PRIV_DATA structure. -Before registering the lowpan netdev interface you must run: +Before registering the lowpan netdev interface you must run:: lowpan_netdev_setup(dev, LOWPAN_LLTYPE_FOOBAR); wheres LOWPAN_LLTYPE_FOOBAR is a define for your 6LoWPAN linklayer type of enum lowpan_lltypes. -Example to evaluate the private usually you can do: +Example to evaluate the private usually you can do:: -static inline struct lowpan_priv_foobar * -lowpan_foobar_priv(struct net_device *dev) -{ + static inline struct lowpan_priv_foobar * + lowpan_foobar_priv(struct net_device *dev) + { return (struct lowpan_priv_foobar *)lowpan_priv(dev)->priv; -} + } -switch (dev->type) { -case ARPHRD_6LOWPAN: + switch (dev->type) { + case ARPHRD_6LOWPAN: lowpan_priv = lowpan_priv(dev); /* do great stuff which is ARPHRD_6LOWPAN related */ switch (lowpan_priv->lltype) { @@ -42,8 +45,8 @@ case ARPHRD_6LOWPAN: ... } break; -... -} + ... + } In case of generic 6lowpan branch ("net/6lowpan") you can remove the check on ARPHRD_6LOWPAN, because you can be sure that these function are called diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.rst b/Documentation/networking/device_drivers/stmicro/stmmac.rst index c34bab3d2df0..5d46e5036129 100644 --- a/Documentation/networking/device_drivers/stmicro/stmmac.rst +++ b/Documentation/networking/device_drivers/stmicro/stmmac.rst @@ -32,7 +32,8 @@ is also supported. DesignWare(R) Cores Ethernet MAC 10/100/1000 Universal version 3.70a (and older) and DesignWare(R) Cores Ethernet Quality-of-Service version 4.0 (and upper) have been used for developing this driver as well as -DesignWare(R) Cores XGMAC - 10G Ethernet MAC. +DesignWare(R) Cores XGMAC - 10G Ethernet MAC and DesignWare(R) Cores +Enterprise MAC - 100G Ethernet MAC. This driver supports both the platform bus and PCI. @@ -48,6 +49,8 @@ Cores Ethernet Controllers and corresponding minimum and maximum versions: +-------------------------------+--------------+--------------+--------------+ | XGMAC - 10G Ethernet MAC | 2.10a | N/A | XGMAC2+ | +-------------------------------+--------------+--------------+--------------+ +| XLGMAC - 100G Ethernet MAC | 2.00a | N/A | XLGMAC2+ | ++-------------------------------+--------------+--------------+--------------+ For questions related to hardware requirements, refer to the documentation supplied with your Ethernet adapter. All hardware requirements listed apply @@ -57,7 +60,7 @@ Feature List ============ The following features are available in this driver: - - GMII/MII/RGMII/SGMII/RMII/XGMII Interface + - GMII/MII/RGMII/SGMII/RMII/XGMII/XLGMII Interface - Half-Duplex / Full-Duplex Operation - Energy Efficient Ethernet (EEE) - IEEE 802.3x PAUSE Packets (Flow Control) diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 3a83cfb66704..50133d9761c9 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -34,6 +34,7 @@ Contents: tls tls-offload nfc + 6lowpan .. only:: subproject and html diff --git a/MAINTAINERS b/MAINTAINERS index 5dbee41045bc..97dce264bc7c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -176,7 +176,7 @@ L: linux-wpan@vger.kernel.org S: Maintained F: net/6lowpan/ F: include/net/6lowpan.h -F: Documentation/networking/6lowpan.txt +F: Documentation/networking/6lowpan.rst 6PACK NETWORK DRIVER FOR AX.25 M: Andreas Koensgen <ajk@comnets.uni-bremen.de> diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index f7aa2dc1ff85..4e73a531b377 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -211,12 +211,12 @@ config BT_HCIUART_RTL depends on BT_HCIUART depends on BT_HCIUART_SERDEV depends on GPIOLIB - depends on ACPI + depends on (ACPI || SERIAL_DEV_CTRL_TTYPORT) select BT_HCIUART_3WIRE select BT_RTL help The Realtek protocol support enables Bluetooth HCI over 3-Wire - serial port internface for Realtek Bluetooth controllers. + serial port interface for Realtek Bluetooth controllers. Say Y here to compile support for Realtek protocol. diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 0e5954cac98e..5a321b4076aa 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -133,8 +133,8 @@ static int bfusb_send_bulk(struct bfusb_data *data, struct sk_buff *skb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - BT_ERR("%s bulk tx submit failed urb %p err %d", - data->hdev->name, urb, err); + bt_dev_err(data->hdev, "bulk tx submit failed urb %p err %d", + urb, err); skb_unlink(skb, &data->pending_q); usb_free_urb(urb); } else @@ -232,8 +232,8 @@ static int bfusb_rx_submit(struct bfusb_data *data, struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - BT_ERR("%s bulk rx submit failed urb %p err %d", - data->hdev->name, urb, err); + bt_dev_err(data->hdev, "bulk rx submit failed urb %p err %d", + urb, err); skb_unlink(skb, &data->pending_q); kfree_skb(skb); usb_free_urb(urb); @@ -247,7 +247,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch BT_DBG("bfusb %p hdr 0x%02x data %p len %d", data, hdr, buf, len); if (hdr & 0x10) { - BT_ERR("%s error in block", data->hdev->name); + bt_dev_err(data->hdev, "error in block"); kfree_skb(data->reassembly); data->reassembly = NULL; return -EIO; @@ -259,13 +259,13 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch int pkt_len = 0; if (data->reassembly) { - BT_ERR("%s unexpected start block", data->hdev->name); + bt_dev_err(data->hdev, "unexpected start block"); kfree_skb(data->reassembly); data->reassembly = NULL; } if (len < 1) { - BT_ERR("%s no packet type found", data->hdev->name); + bt_dev_err(data->hdev, "no packet type found"); return -EPROTO; } @@ -277,7 +277,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch struct hci_event_hdr *hdr = (struct hci_event_hdr *) buf; pkt_len = HCI_EVENT_HDR_SIZE + hdr->plen; } else { - BT_ERR("%s event block is too short", data->hdev->name); + bt_dev_err(data->hdev, "event block is too short"); return -EILSEQ; } break; @@ -287,7 +287,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch struct hci_acl_hdr *hdr = (struct hci_acl_hdr *) buf; pkt_len = HCI_ACL_HDR_SIZE + __le16_to_cpu(hdr->dlen); } else { - BT_ERR("%s data block is too short", data->hdev->name); + bt_dev_err(data->hdev, "data block is too short"); return -EILSEQ; } break; @@ -297,7 +297,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch struct hci_sco_hdr *hdr = (struct hci_sco_hdr *) buf; pkt_len = HCI_SCO_HDR_SIZE + hdr->dlen; } else { - BT_ERR("%s audio block is too short", data->hdev->name); + bt_dev_err(data->hdev, "audio block is too short"); return -EILSEQ; } break; @@ -305,7 +305,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch skb = bt_skb_alloc(pkt_len, GFP_ATOMIC); if (!skb) { - BT_ERR("%s no memory for the packet", data->hdev->name); + bt_dev_err(data->hdev, "no memory for the packet"); return -ENOMEM; } @@ -314,7 +314,7 @@ static inline int bfusb_recv_block(struct bfusb_data *data, int hdr, unsigned ch data->reassembly = skb; } else { if (!data->reassembly) { - BT_ERR("%s unexpected continuation block", data->hdev->name); + bt_dev_err(data->hdev, "unexpected continuation block"); return -EIO; } } @@ -366,8 +366,7 @@ static void bfusb_rx_complete(struct urb *urb) } if (count < len) { - BT_ERR("%s block extends over URB buffer ranges", - data->hdev->name); + bt_dev_err(data->hdev, "block extends over URB buffer ranges"); } if ((hdr & 0xe1) == 0xc1) @@ -391,8 +390,8 @@ resubmit: err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - BT_ERR("%s bulk resubmit failed urb %p err %d", - data->hdev->name, urb, err); + bt_dev_err(data->hdev, "bulk resubmit failed urb %p err %d", + urb, err); } unlock: @@ -477,7 +476,7 @@ static int bfusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb) /* Max HCI frame size seems to be 1511 + 1 */ nskb = bt_skb_alloc(count + 32, GFP_KERNEL); if (!nskb) { - BT_ERR("Can't allocate memory for new packet"); + bt_dev_err(hdev, "Can't allocate memory for new packet"); return -ENOMEM; } diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 62e781a18bf0..6a0e2c5a8beb 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -376,13 +376,13 @@ struct ibt_cp_reg_access { __le32 addr; __u8 mode; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; struct ibt_rp_reg_access { __u8 status; __le32 addr; - __u8 data[0]; + __u8 data[]; } __packed; static int regmap_ibt_read(void *context, const void *addr, size_t reg_size, diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index ec69e5dd7bd3..a16845c0751d 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -139,7 +139,7 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); static void qca_tlv_check_data(struct qca_fw_config *config, - const struct firmware *fw) + const struct firmware *fw, enum qca_btsoc_type soc_type) { const u8 *data; u32 type_len; @@ -148,6 +148,7 @@ static void qca_tlv_check_data(struct qca_fw_config *config, struct tlv_type_hdr *tlv; struct tlv_type_patch *tlv_patch; struct tlv_type_nvm *tlv_nvm; + uint8_t nvm_baud_rate = config->user_baud_rate; tlv = (struct tlv_type_hdr *)fw->data; @@ -216,7 +217,10 @@ static void qca_tlv_check_data(struct qca_fw_config *config, tlv_nvm->data[0] |= 0x80; /* UART Baud Rate */ - tlv_nvm->data[2] = config->user_baud_rate; + if (soc_type == QCA_WCN3991) + tlv_nvm->data[1] = nvm_baud_rate; + else + tlv_nvm->data[2] = nvm_baud_rate; break; @@ -354,7 +358,7 @@ static int qca_download_firmware(struct hci_dev *hdev, return ret; } - qca_tlv_check_data(config, fw); + qca_tlv_check_data(config, fw, soc_type); segment = fw->data; remain = fw->size; diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index f5795b1a3779..e16a4d650597 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -79,7 +79,7 @@ struct qca_fw_config { struct edl_event_hdr { __u8 cresp; __u8 rtype; - __u8 data[0]; + __u8 data[]; } __packed; struct qca_btsoc_version { @@ -112,12 +112,12 @@ struct tlv_type_nvm { __le16 tag_len; __le32 reserve1; __le32 reserve2; - __u8 data[0]; + __u8 data[]; } __packed; struct tlv_type_hdr { __le32 type_len; - __u8 data[0]; + __u8 data[]; } __packed; enum qca_btsoc_type { diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 577cfa3329db..67f4bc21e7c5 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -136,6 +136,18 @@ static const struct id_table ic_id_table[] = { .fw_name = "rtl_bt/rtl8761a_fw.bin", .cfg_name = "rtl_bt/rtl8761a_config" }, + /* 8822C with UART interface */ + { .match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV | + IC_MATCH_FL_HCIBUS, + .lmp_subver = RTL_ROM_LMP_8822B, + .hci_rev = 0x000c, + .hci_ver = 0x0a, + .hci_bus = HCI_UART, + .config_needed = true, + .has_rom_version = true, + .fw_name = "rtl_bt/rtl8822cs_fw.bin", + .cfg_name = "rtl_bt/rtl8822cs_config" }, + /* 8822C with USB interface */ { IC_INFO(RTL_ROM_LMP_8822B, 0xc), .config_needed = false, diff --git a/drivers/bluetooth/btrtl.h b/drivers/bluetooth/btrtl.h index 10ad40c3e42c..2a582682136d 100644 --- a/drivers/bluetooth/btrtl.h +++ b/drivers/bluetooth/btrtl.h @@ -38,13 +38,13 @@ struct rtl_epatch_header { struct rtl_vendor_config_entry { __le16 offset; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; struct rtl_vendor_config { __le32 signature; __le16 total_len; - struct rtl_vendor_config_entry entry[0]; + struct rtl_vendor_config_entry entry[]; } __packed; #if IS_ENABLED(CONFIG_BT_RTL) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index f5924f3e8b8d..3bdec42c9612 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -57,6 +57,7 @@ static struct usb_driver btusb_driver; #define BTUSB_IFNUM_2 0x80000 #define BTUSB_CW6622 0x100000 #define BTUSB_MEDIATEK 0x200000 +#define BTUSB_WIDEBAND_SPEECH 0x400000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -333,15 +334,21 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL }, /* Intel Bluetooth devices */ - { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW }, - { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW }, - { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW }, + { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, - { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW }, - { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL }, - { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW }, + { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW | + BTUSB_WIDEBAND_SPEECH }, /* Other Intel Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01), @@ -387,6 +394,7 @@ static const struct usb_device_id blacklist_table[] = { /* Additional Realtek 8822CE Bluetooth devices */ { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK }, + { USB_DEVICE(0x13d3, 0x3548), .driver_info = BTUSB_REALTEK }, /* Silicon Wave based devices */ { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE }, @@ -1930,7 +1938,14 @@ static int btusb_setup_intel(struct hci_dev *hdev) if (err) return err; - bt_dev_info(hdev, "Intel firmware patch completed and activated"); + /* Need build number for downloaded fw patches in + * every power-on boot + */ + err = btintel_read_version(hdev, &ver); + if (err) + return err; + bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated", + ver.fw_patch_num); goto complete; @@ -3859,6 +3874,9 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_BROKEN_ISOC) data->isoc = NULL; + if (id->driver_info & BTUSB_WIDEBAND_SPEECH) + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); diff --git a/drivers/bluetooth/hci_ag6xx.c b/drivers/bluetooth/hci_ag6xx.c index 8bafa650b5b0..1f55df93e4ce 100644 --- a/drivers/bluetooth/hci_ag6xx.c +++ b/drivers/bluetooth/hci_ag6xx.c @@ -27,7 +27,7 @@ struct ag6xx_data { struct pbn_entry { __le32 addr; __le32 plen; - __u8 data[0]; + __u8 data[]; } __packed; static int ag6xx_open(struct hci_uart *hu) diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 6dc1fbeb564b..4b3b14a34794 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -71,8 +71,6 @@ static int h4_close(struct hci_uart *hu) { struct h4_struct *h4 = hu->priv; - hu->priv = NULL; - BT_DBG("hu %p", hu); skb_queue_purge(&h4->txq); @@ -85,7 +83,7 @@ static int h4_close(struct hci_uart *hu) return 0; } -/* Enqueue frame for transmittion (padding, crc, etc) */ +/* Enqueue frame for transmission (padding, crc, etc) */ static int h4_enqueue(struct hci_uart *hu, struct sk_buff *skb) { struct h4_struct *h4 = hu->priv; diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 0b14547482a7..106c110efe56 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -11,6 +11,7 @@ #include <linux/gpio/consumer.h> #include <linux/kernel.h> #include <linux/mod_devicetable.h> +#include <linux/of_device.h> #include <linux/serdev.h> #include <linux/skbuff.h> @@ -177,7 +178,7 @@ static void h5_peer_reset(struct hci_uart *hu) { struct h5 *h5 = hu->priv; - BT_ERR("Peer device has reset"); + bt_dev_err(hu->hdev, "Peer device has reset"); h5->state = H5_UNINITIALIZED; @@ -437,21 +438,21 @@ static int h5_rx_3wire_hdr(struct hci_uart *hu, unsigned char c) H5_HDR_LEN(hdr)); if (((hdr[0] + hdr[1] + hdr[2] + hdr[3]) & 0xff) != 0xff) { - BT_ERR("Invalid header checksum"); + bt_dev_err(hu->hdev, "Invalid header checksum"); h5_reset_rx(h5); return 0; } if (H5_HDR_RELIABLE(hdr) && H5_HDR_SEQ(hdr) != h5->tx_ack) { - BT_ERR("Out-of-order packet arrived (%u != %u)", - H5_HDR_SEQ(hdr), h5->tx_ack); + bt_dev_err(hu->hdev, "Out-of-order packet arrived (%u != %u)", + H5_HDR_SEQ(hdr), h5->tx_ack); h5_reset_rx(h5); return 0; } if (h5->state != H5_ACTIVE && H5_HDR_PKT_TYPE(hdr) != HCI_3WIRE_LINK_PKT) { - BT_ERR("Non-link packet received in non-active state"); + bt_dev_err(hu->hdev, "Non-link packet received in non-active state"); h5_reset_rx(h5); return 0; } @@ -474,7 +475,7 @@ static int h5_rx_pkt_start(struct hci_uart *hu, unsigned char c) h5->rx_skb = bt_skb_alloc(H5_MAX_LEN, GFP_ATOMIC); if (!h5->rx_skb) { - BT_ERR("Can't allocate mem for new packet"); + bt_dev_err(hu->hdev, "Can't allocate mem for new packet"); h5_reset_rx(h5); return -ENOMEM; } @@ -550,7 +551,7 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count) if (h5->rx_pending > 0) { if (*ptr == SLIP_DELIMITER) { - BT_ERR("Too short H5 packet"); + bt_dev_err(hu->hdev, "Too short H5 packet"); h5_reset_rx(h5); continue; } @@ -577,13 +578,13 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb) struct h5 *h5 = hu->priv; if (skb->len > 0xfff) { - BT_ERR("Packet too long (%u bytes)", skb->len); + bt_dev_err(hu->hdev, "Packet too long (%u bytes)", skb->len); kfree_skb(skb); return 0; } if (h5->state != H5_ACTIVE) { - BT_ERR("Ignoring HCI data in non-active state"); + bt_dev_err(hu->hdev, "Ignoring HCI data in non-active state"); kfree_skb(skb); return 0; } @@ -600,7 +601,7 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb) break; default: - BT_ERR("Unknown packet type %u", hci_skb_pkt_type(skb)); + bt_dev_err(hu->hdev, "Unknown packet type %u", hci_skb_pkt_type(skb)); kfree_skb(skb); break; } @@ -656,7 +657,7 @@ static struct sk_buff *h5_prepare_pkt(struct hci_uart *hu, u8 pkt_type, int i; if (!valid_packet_type(pkt_type)) { - BT_ERR("Unknown packet type %u", pkt_type); + bt_dev_err(hu->hdev, "Unknown packet type %u", pkt_type); return NULL; } @@ -733,7 +734,7 @@ static struct sk_buff *h5_dequeue(struct hci_uart *hu) } skb_queue_head(&h5->unrel, skb); - BT_ERR("Could not dequeue pkt because alloc_skb failed"); + bt_dev_err(hu->hdev, "Could not dequeue pkt because alloc_skb failed"); } spin_lock_irqsave_nested(&h5->unack.lock, flags, SINGLE_DEPTH_NESTING); @@ -753,7 +754,7 @@ static struct sk_buff *h5_dequeue(struct hci_uart *hu) } skb_queue_head(&h5->rel, skb); - BT_ERR("Could not dequeue pkt because alloc_skb failed"); + bt_dev_err(hu->hdev, "Could not dequeue pkt because alloc_skb failed"); } unlock: @@ -785,7 +786,6 @@ static const struct hci_uart_proto h5p = { static int h5_serdev_probe(struct serdev_device *serdev) { - const struct acpi_device_id *match; struct device *dev = &serdev->dev; struct h5 *h5; @@ -800,6 +800,8 @@ static int h5_serdev_probe(struct serdev_device *serdev) serdev_device_set_drvdata(serdev, h5); if (has_acpi_companion(dev)) { + const struct acpi_device_id *match; + match = acpi_match_device(dev->driver->acpi_match_table, dev); if (!match) return -ENODEV; @@ -810,8 +812,17 @@ static int h5_serdev_probe(struct serdev_device *serdev) if (h5->vnd->acpi_gpio_map) devm_acpi_dev_add_driver_gpios(dev, h5->vnd->acpi_gpio_map); + } else { + const void *data; + + data = of_device_get_match_data(dev); + if (!data) + return -ENODEV; + + h5->vnd = (const struct h5_vnd *)data; } + h5->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(h5->enable_gpio)) return PTR_ERR(h5->enable_gpio); @@ -1003,6 +1014,15 @@ static const struct dev_pm_ops h5_serdev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(h5_serdev_suspend, h5_serdev_resume) }; +static const struct of_device_id rtl_bluetooth_of_match[] = { +#ifdef CONFIG_BT_HCIUART_RTL + { .compatible = "realtek,rtl8822cs-bt", + .data = (const void *)&rtl_vnd }, +#endif + { }, +}; +MODULE_DEVICE_TABLE(of, rtl_bluetooth_of_match); + static struct serdev_device_driver h5_serdev_driver = { .probe = h5_serdev_probe, .remove = h5_serdev_remove, @@ -1010,6 +1030,7 @@ static struct serdev_device_driver h5_serdev_driver = { .name = "hci_uart_h5", .acpi_match_table = ACPI_PTR(h5_acpi_match), .pm = &h5_serdev_pm_ops, + .of_match_table = rtl_bluetooth_of_match, }, }; diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 31f25153087d..f1299da6eed8 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -49,7 +49,7 @@ struct hci_lpm_pkt { __u8 opcode; __u8 dlen; - __u8 data[0]; + __u8 data[]; } __packed; struct intel_device { diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index d6e0c99ee5eb..439392b1c043 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -29,6 +29,7 @@ #include <linux/platform_device.h> #include <linux/regulator/consumer.h> #include <linux/serdev.h> +#include <linux/mutex.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -69,7 +70,8 @@ enum qca_flags { QCA_IBS_ENABLED, QCA_DROP_VENDOR_EVENT, QCA_SUSPENDING, - QCA_MEMDUMP_COLLECTION + QCA_MEMDUMP_COLLECTION, + QCA_HW_ERROR_EVENT }; @@ -138,18 +140,19 @@ struct qca_data { u32 tx_idle_delay; struct timer_list wake_retrans_timer; u32 wake_retrans; - struct timer_list memdump_timer; struct workqueue_struct *workqueue; struct work_struct ws_awake_rx; struct work_struct ws_awake_device; struct work_struct ws_rx_vote_off; struct work_struct ws_tx_vote_off; struct work_struct ctrl_memdump_evt; + struct delayed_work ctrl_memdump_timeout; struct qca_memdump_data *qca_memdump; unsigned long flags; struct completion drop_ev_comp; wait_queue_head_t suspend_wait_q; enum qca_memdump_states memdump_state; + struct mutex hci_memdump_lock; /* For debugging purpose */ u64 ibs_sent_wacks; @@ -522,23 +525,28 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t) hci_uart_tx_wakeup(hu); } -static void hci_memdump_timeout(struct timer_list *t) + +static void qca_controller_memdump_timeout(struct work_struct *work) { - struct qca_data *qca = from_timer(qca, t, tx_idle_timer); + struct qca_data *qca = container_of(work, struct qca_data, + ctrl_memdump_timeout.work); struct hci_uart *hu = qca->hu; - struct qca_memdump_data *qca_memdump = qca->qca_memdump; - char *memdump_buf = qca_memdump->memdump_buf_tail; - - bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); - /* Inject hw error event to reset the device and driver. */ - hci_reset_dev(hu->hdev); - vfree(memdump_buf); - kfree(qca_memdump); - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; - del_timer(&qca->memdump_timer); - cancel_work_sync(&qca->ctrl_memdump_evt); + + mutex_lock(&qca->hci_memdump_lock); + if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) { + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; + if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) { + /* Inject hw error event to reset the device + * and driver. + */ + hci_reset_dev(hu->hdev); + } + } + + mutex_unlock(&qca->hci_memdump_lock); } + /* Initialize protocol */ static int qca_open(struct hci_uart *hu) { @@ -558,6 +566,7 @@ static int qca_open(struct hci_uart *hu) skb_queue_head_init(&qca->tx_wait_q); skb_queue_head_init(&qca->rx_memdump_q); spin_lock_init(&qca->hci_ibs_lock); + mutex_init(&qca->hci_memdump_lock); qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); if (!qca->workqueue) { BT_ERR("QCA Workqueue not initialized properly"); @@ -570,6 +579,8 @@ static int qca_open(struct hci_uart *hu) INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off); INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off); INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump); + INIT_DELAYED_WORK(&qca->ctrl_memdump_timeout, + qca_controller_memdump_timeout); init_waitqueue_head(&qca->suspend_wait_q); qca->hu = hu; @@ -596,7 +607,6 @@ static int qca_open(struct hci_uart *hu) timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0); qca->tx_idle_delay = IBS_HOST_TX_IDLE_TIMEOUT_MS; - timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0); BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u", qca->tx_idle_delay, qca->wake_retrans); @@ -677,7 +687,6 @@ static int qca_close(struct hci_uart *hu) skb_queue_purge(&qca->rx_memdump_q); del_timer(&qca->tx_idle_timer); del_timer(&qca->wake_retrans_timer); - del_timer(&qca->memdump_timer); destroy_workqueue(qca->workqueue); qca->hu = NULL; @@ -963,11 +972,20 @@ static void qca_controller_memdump(struct work_struct *work) while ((skb = skb_dequeue(&qca->rx_memdump_q))) { + mutex_lock(&qca->hci_memdump_lock); + /* Skip processing the received packets if timeout detected. */ + if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) { + mutex_unlock(&qca->hci_memdump_lock); + return; + } + if (!qca_memdump) { qca_memdump = kzalloc(sizeof(struct qca_memdump_data), GFP_ATOMIC); - if (!qca_memdump) + if (!qca_memdump) { + mutex_unlock(&qca->hci_memdump_lock); return; + } qca->qca_memdump = qca_memdump; } @@ -992,13 +1010,15 @@ static void qca_controller_memdump(struct work_struct *work) if (!(dump_size)) { bt_dev_err(hu->hdev, "Rx invalid memdump size"); kfree_skb(skb); + mutex_unlock(&qca->hci_memdump_lock); return; } bt_dev_info(hu->hdev, "QCA collecting dump of size:%u", dump_size); - mod_timer(&qca->memdump_timer, (jiffies + - msecs_to_jiffies(MEMDUMP_TIMEOUT_MS))); + queue_delayed_work(qca->workqueue, + &qca->ctrl_memdump_timeout, + msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)); skb_pull(skb, sizeof(dump_size)); memdump_buf = vmalloc(dump_size); @@ -1016,6 +1036,7 @@ static void qca_controller_memdump(struct work_struct *work) kfree(qca_memdump); kfree_skb(skb); qca->qca_memdump = NULL; + mutex_unlock(&qca->hci_memdump_lock); return; } @@ -1046,16 +1067,20 @@ static void qca_controller_memdump(struct work_struct *work) memdump_buf = qca_memdump->memdump_buf_head; dev_coredumpv(&hu->serdev->dev, memdump_buf, qca_memdump->received_dump, GFP_KERNEL); - del_timer(&qca->memdump_timer); + cancel_delayed_work(&qca->ctrl_memdump_timeout); kfree(qca->qca_memdump); qca->qca_memdump = NULL; qca->memdump_state = QCA_MEMDUMP_COLLECTED; + clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); } + + mutex_unlock(&qca->hci_memdump_lock); } } -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb) +static int qca_controller_memdump_event(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; @@ -1406,30 +1431,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; - struct qca_memdump_data *qca_memdump = qca->qca_memdump; - char *memdump_buf = NULL; wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); - if (qca->memdump_state == QCA_MEMDUMP_IDLE) { - bt_dev_err(hu->hdev, "Clearing the buffers due to timeout"); - if (qca_memdump) - memdump_buf = qca_memdump->memdump_buf_tail; - vfree(memdump_buf); - kfree(qca_memdump); - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; - del_timer(&qca->memdump_timer); - cancel_work_sync(&qca->ctrl_memdump_evt); - } } static void qca_hw_error(struct hci_dev *hdev, u8 code) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; + struct qca_memdump_data *qca_memdump = qca->qca_memdump; + char *memdump_buf = NULL; + set_bit(QCA_HW_ERROR_EVENT, &qca->flags); bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state); if (qca->memdump_state == QCA_MEMDUMP_IDLE) { @@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code) bt_dev_info(hdev, "waiting for dump to complete"); qca_wait_for_dump_collection(hdev); } + + if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) { + bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); + mutex_lock(&qca->hci_memdump_lock); + if (qca_memdump) + memdump_buf = qca_memdump->memdump_buf_head; + vfree(memdump_buf); + kfree(qca_memdump); + qca->qca_memdump = NULL; + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; + cancel_delayed_work(&qca->ctrl_memdump_timeout); + skb_queue_purge(&qca->rx_memdump_q); + mutex_unlock(&qca->hci_memdump_lock); + cancel_work_sync(&qca->ctrl_memdump_evt); + } + + clear_bit(QCA_HW_ERROR_EVENT, &qca->flags); } static void qca_cmd_timeout(struct hci_dev *hdev) @@ -1529,9 +1562,11 @@ static int qca_power_on(struct hci_dev *hdev) ret = qca_wcn3990_init(hu); } else { qcadev = serdev_device_get_drvdata(hu->serdev); - gpiod_set_value_cansleep(qcadev->bt_en, 1); - /* Controller needs time to bootup. */ - msleep(150); + if (qcadev->bt_en) { + gpiod_set_value_cansleep(qcadev->bt_en, 1); + /* Controller needs time to bootup. */ + msleep(150); + } } return ret; @@ -1717,7 +1752,7 @@ static void qca_power_shutdown(struct hci_uart *hu) host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else { + } else if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 0); } } @@ -1726,9 +1761,11 @@ static int qca_power_off(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; + enum qca_btsoc_type soc_type = qca_soc_type(hu); /* Stop sending shutdown command if soc crashes. */ - if (qca->memdump_state == QCA_MEMDUMP_IDLE) { + if (qca_is_wcn399x(soc_type) + && qca->memdump_state == QCA_MEMDUMP_IDLE) { qca_send_pre_shutdown_cmd(hdev); usleep_range(8000, 10000); } @@ -1755,7 +1792,11 @@ static int qca_regulator_enable(struct qca_serdev *qcadev) power->vregs_on = true; - return 0; + ret = clk_prepare_enable(qcadev->susclk); + if (ret) + qca_regulator_disable(qcadev); + + return ret; } static void qca_regulator_disable(struct qca_serdev *qcadev) @@ -1773,6 +1814,8 @@ static void qca_regulator_disable(struct qca_serdev *qcadev) regulator_bulk_disable(power->num_vregs, power->vreg_bulk); power->vregs_on = false; + + clk_disable_unprepare(qcadev->susclk); } static int qca_init_regulators(struct qca_power *qca, @@ -1811,6 +1854,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) struct hci_dev *hdev; const struct qca_vreg_data *data; int err; + bool power_ctrl_enabled = true; qcadev = devm_kzalloc(&serdev->dev, sizeof(*qcadev), GFP_KERNEL); if (!qcadev) @@ -1839,6 +1883,12 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_power->vregs_on = false; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); + if (IS_ERR(qcadev->susclk)) { + dev_err(&serdev->dev, "failed to acquire clk\n"); + return PTR_ERR(qcadev->susclk); + } + device_property_read_u32(&serdev->dev, "max-speed", &qcadev->oper_speed); if (!qcadev->oper_speed) @@ -1851,38 +1901,40 @@ static int qca_serdev_probe(struct serdev_device *serdev) } } else { qcadev->btsoc_type = QCA_ROME; - qcadev->bt_en = devm_gpiod_get(&serdev->dev, "enable", + qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR(qcadev->bt_en)) { - dev_err(&serdev->dev, "failed to acquire enable gpio\n"); - return PTR_ERR(qcadev->bt_en); + if (!qcadev->bt_en) { + dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); + power_ctrl_enabled = false; } - qcadev->susclk = devm_clk_get(&serdev->dev, NULL); - if (IS_ERR(qcadev->susclk)) { - dev_err(&serdev->dev, "failed to acquire clk\n"); - return PTR_ERR(qcadev->susclk); - } - - err = clk_set_rate(qcadev->susclk, SUSCLK_RATE_32KHZ); - if (err) - return err; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); + if (!qcadev->susclk) { + dev_warn(&serdev->dev, "failed to acquire clk\n"); + } else { + err = clk_set_rate(qcadev->susclk, SUSCLK_RATE_32KHZ); + if (err) + return err; - err = clk_prepare_enable(qcadev->susclk); - if (err) - return err; + err = clk_prepare_enable(qcadev->susclk); + if (err) + return err; + } err = hci_uart_register_device(&qcadev->serdev_hu, &qca_proto); if (err) { BT_ERR("Rome serdev registration failed"); - clk_disable_unprepare(qcadev->susclk); + if (qcadev->susclk) + clk_disable_unprepare(qcadev->susclk); return err; } } - hdev = qcadev->serdev_hu.hdev; - set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); - hdev->shutdown = qca_power_off; + if (power_ctrl_enabled) { + hdev = qcadev->serdev_hu.hdev; + set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + hdev->shutdown = qca_power_off; + } return 0; } @@ -1893,7 +1945,7 @@ static void qca_serdev_remove(struct serdev_device *serdev) if (qca_is_wcn399x(qcadev->btsoc_type)) qca_power_shutdown(&qcadev->serdev_hu); - else + else if (qcadev->susclk) clk_disable_unprepare(qcadev->susclk); hci_uart_unregister_device(&qcadev->serdev_hu); diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index d801fc204d19..4c40f2d51a54 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -36,6 +36,7 @@ struct sja1105_regs { u64 port_control; u64 rgu; u64 config; + u64 sgmii; u64 rmii_pll1; u64 ptp_control; u64 ptpclkval; @@ -159,6 +160,7 @@ typedef enum { XMII_MODE_MII = 0, XMII_MODE_RMII = 1, XMII_MODE_RGMII = 2, + XMII_MODE_SGMII = 3, } sja1105_phy_interface_t; typedef enum { diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index 9082e52b55e9..0fdc2d55fff6 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -660,6 +660,10 @@ int sja1105_clocking_setup_port(struct sja1105_private *priv, int port) case XMII_MODE_RGMII: rc = sja1105_rgmii_clocking_setup(priv, port, role); break; + case XMII_MODE_SGMII: + /* Nothing to do in the CGU for SGMII */ + rc = 0; + break; default: dev_err(dev, "Invalid interface mode specified: %d\n", phy_mode); diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index d8123288c572..afafe2ecf248 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -22,6 +22,7 @@ #include <linux/if_ether.h> #include <linux/dsa/8021q.h> #include "sja1105.h" +#include "sja1105_sgmii.h" #include "sja1105_tas.h" static void sja1105_hw_reset(struct gpio_desc *gpio, unsigned int pulse_len, @@ -135,6 +136,21 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) return 0; } +static bool sja1105_supports_sgmii(struct sja1105_private *priv, int port) +{ + if (priv->info->part_no != SJA1105R_PART_NO && + priv->info->part_no != SJA1105S_PART_NO) + return false; + + if (port != SJA1105_SGMII_PORT) + return false; + + if (dsa_is_unused_port(priv->ds, port)) + return false; + + return true; +} + static int sja1105_init_mii_settings(struct sja1105_private *priv, struct sja1105_dt_port *ports) { @@ -162,6 +178,9 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv, mii = table->entries; for (i = 0; i < SJA1105_NUM_PORTS; i++) { + if (dsa_is_unused_port(priv->ds, i)) + continue; + switch (ports[i].phy_mode) { case PHY_INTERFACE_MODE_MII: mii->xmii_mode[i] = XMII_MODE_MII; @@ -175,12 +194,24 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv, case PHY_INTERFACE_MODE_RGMII_TXID: mii->xmii_mode[i] = XMII_MODE_RGMII; break; + case PHY_INTERFACE_MODE_SGMII: + if (!sja1105_supports_sgmii(priv, i)) + return -EINVAL; + mii->xmii_mode[i] = XMII_MODE_SGMII; + break; default: dev_err(dev, "Unsupported PHY mode %s!\n", phy_modes(ports[i].phy_mode)); } - mii->phy_mac[i] = ports[i].role; + /* Even though the SerDes port is able to drive SGMII autoneg + * like a PHY would, from the perspective of the XMII tables, + * the SGMII port should always be put in MAC mode. + */ + if (ports[i].phy_mode == PHY_INTERFACE_MODE_SGMII) + mii->phy_mac[i] = XMII_MAC; + else + mii->phy_mac[i] = ports[i].role; } return 0; } @@ -647,6 +678,85 @@ static int sja1105_parse_dt(struct sja1105_private *priv, return rc; } +static int sja1105_sgmii_read(struct sja1105_private *priv, int pcs_reg) +{ + const struct sja1105_regs *regs = priv->info->regs; + u32 val; + int rc; + + rc = sja1105_xfer_u32(priv, SPI_READ, regs->sgmii + pcs_reg, &val, + NULL); + if (rc < 0) + return rc; + + return val; +} + +static int sja1105_sgmii_write(struct sja1105_private *priv, int pcs_reg, + u16 pcs_val) +{ + const struct sja1105_regs *regs = priv->info->regs; + u32 val = pcs_val; + int rc; + + rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->sgmii + pcs_reg, &val, + NULL); + if (rc < 0) + return rc; + + return val; +} + +static void sja1105_sgmii_pcs_config(struct sja1105_private *priv, + bool an_enabled, bool an_master) +{ + u16 ac = SJA1105_AC_AUTONEG_MODE_SGMII; + + /* DIGITAL_CONTROL_1: Enable vendor-specific MMD1, allow the PHY to + * stop the clock during LPI mode, make the MAC reconfigure + * autonomously after PCS autoneg is done, flush the internal FIFOs. + */ + sja1105_sgmii_write(priv, SJA1105_DC1, SJA1105_DC1_EN_VSMMD1 | + SJA1105_DC1_CLOCK_STOP_EN | + SJA1105_DC1_MAC_AUTO_SW | + SJA1105_DC1_INIT); + /* DIGITAL_CONTROL_2: No polarity inversion for TX and RX lanes */ + sja1105_sgmii_write(priv, SJA1105_DC2, SJA1105_DC2_TX_POL_INV_DISABLE); + /* AUTONEG_CONTROL: Use SGMII autoneg */ + if (an_master) + ac |= SJA1105_AC_PHY_MODE | SJA1105_AC_SGMII_LINK; + sja1105_sgmii_write(priv, SJA1105_AC, ac); + /* BASIC_CONTROL: enable in-band AN now, if requested. Otherwise, + * sja1105_sgmii_pcs_force_speed must be called later for the link + * to become operational. + */ + if (an_enabled) + sja1105_sgmii_write(priv, MII_BMCR, + BMCR_ANENABLE | BMCR_ANRESTART); +} + +static void sja1105_sgmii_pcs_force_speed(struct sja1105_private *priv, + int speed) +{ + int pcs_speed; + + switch (speed) { + case SPEED_1000: + pcs_speed = BMCR_SPEED1000; + break; + case SPEED_100: + pcs_speed = BMCR_SPEED100; + break; + case SPEED_10: + pcs_speed = BMCR_SPEED10; + break; + default: + dev_err(priv->ds->dev, "Invalid speed %d\n", speed); + return; + } + sja1105_sgmii_write(priv, MII_BMCR, pcs_speed | BMCR_FULLDPLX); +} + /* Convert link speed from SJA1105 to ethtool encoding */ static int sja1105_speed[] = { [SJA1105_SPEED_AUTO] = SPEED_UNKNOWN, @@ -704,8 +814,13 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port, * table, since this will be used for the clocking setup, and we no * longer need to store it in the static config (already told hardware * we want auto during upload phase). + * Actually for the SGMII port, the MAC is fixed at 1 Gbps and + * we need to configure the PCS only (if even that). */ - mac[port].speed = speed; + if (sja1105_supports_sgmii(priv, port)) + mac[port].speed = SJA1105_SPEED_1000MBPS; + else + mac[port].speed = speed; /* Write to the dynamic reconfiguration tables */ rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, @@ -754,16 +869,19 @@ static bool sja1105_phy_mode_mismatch(struct sja1105_private *priv, int port, case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: return (phy_mode != XMII_MODE_RGMII); + case PHY_INTERFACE_MODE_SGMII: + return (phy_mode != XMII_MODE_SGMII); default: return true; } } static void sja1105_mac_config(struct dsa_switch *ds, int port, - unsigned int link_an_mode, + unsigned int mode, const struct phylink_link_state *state) { struct sja1105_private *priv = ds->priv; + bool is_sgmii = sja1105_supports_sgmii(priv, port); if (sja1105_phy_mode_mismatch(priv, port, state->interface)) { dev_err(ds->dev, "Changing PHY mode to %s not supported!\n", @@ -771,10 +889,14 @@ static void sja1105_mac_config(struct dsa_switch *ds, int port, return; } - if (link_an_mode == MLO_AN_INBAND) { + if (phylink_autoneg_inband(mode) && !is_sgmii) { dev_err(ds->dev, "In-band AN not supported!\n"); return; } + + if (is_sgmii) + sja1105_sgmii_pcs_config(priv, phylink_autoneg_inband(mode), + false); } static void sja1105_mac_link_down(struct dsa_switch *ds, int port, @@ -795,6 +917,9 @@ static void sja1105_mac_link_up(struct dsa_switch *ds, int port, sja1105_adjust_port_config(priv, port, speed); + if (sja1105_supports_sgmii(priv, port) && !phylink_autoneg_inband(mode)) + sja1105_sgmii_pcs_force_speed(priv, speed); + sja1105_inhibit_tx(priv, BIT(port), false); } @@ -830,7 +955,8 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port, phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Full); phylink_set(mask, 100baseT1_Full); - if (mii->xmii_mode[port] == XMII_MODE_RGMII) + if (mii->xmii_mode[port] == XMII_MODE_RGMII || + mii->xmii_mode[port] == XMII_MODE_SGMII) phylink_set(mask, 1000baseT_Full); bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -838,6 +964,38 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static int sja1105_mac_pcs_get_state(struct dsa_switch *ds, int port, + struct phylink_link_state *state) +{ + struct sja1105_private *priv = ds->priv; + int ais; + + /* Read the vendor-specific AUTONEG_INTR_STATUS register */ + ais = sja1105_sgmii_read(priv, SJA1105_AIS); + if (ais < 0) + return ais; + + switch (SJA1105_AIS_SPEED(ais)) { + case 0: + state->speed = SPEED_10; + break; + case 1: + state->speed = SPEED_100; + break; + case 2: + state->speed = SPEED_1000; + break; + default: + dev_err(ds->dev, "Invalid SGMII PCS speed %lu\n", + SJA1105_AIS_SPEED(ais)); + } + state->duplex = SJA1105_AIS_DUPLEX_MODE(ais); + state->an_complete = SJA1105_AIS_COMPLETE(ais); + state->link = SJA1105_AIS_LINK_STATUS(ais); + + return 0; +} + static int sja1105_find_static_fdb_entry(struct sja1105_private *priv, int port, const struct sja1105_l2_lookup_entry *requested) @@ -1364,6 +1522,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv, struct dsa_switch *ds = priv->ds; s64 t1, t2, t3, t4; s64 t12, t34; + u16 bmcr = 0; int rc, i; s64 now; @@ -1381,6 +1540,9 @@ int sja1105_static_config_reload(struct sja1105_private *priv, mac[i].speed = SJA1105_SPEED_AUTO; } + if (sja1105_supports_sgmii(priv, SJA1105_SGMII_PORT)) + bmcr = sja1105_sgmii_read(priv, MII_BMCR); + /* No PTP operations can run right now */ mutex_lock(&priv->ptp_data.lock); @@ -1430,6 +1592,25 @@ out_unlock_ptp: if (rc < 0) goto out; } + + if (sja1105_supports_sgmii(priv, SJA1105_SGMII_PORT)) { + bool an_enabled = !!(bmcr & BMCR_ANENABLE); + + sja1105_sgmii_pcs_config(priv, an_enabled, false); + + if (!an_enabled) { + int speed = SPEED_UNKNOWN; + + if (bmcr & BMCR_SPEED1000) + speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + speed = SPEED_100; + else if (bmcr & BMCR_SPEED10) + speed = SPEED_10; + + sja1105_sgmii_pcs_force_speed(priv, speed); + } + } out: mutex_unlock(&priv->mgmt_lock); @@ -1995,6 +2176,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .teardown = sja1105_teardown, .set_ageing_time = sja1105_set_ageing_time, .phylink_validate = sja1105_phylink_validate, + .phylink_mac_link_state = sja1105_mac_pcs_get_state, .phylink_mac_config = sja1105_mac_config, .phylink_mac_link_up = sja1105_mac_link_up, .phylink_mac_link_down = sja1105_mac_link_down, diff --git a/drivers/net/dsa/sja1105/sja1105_sgmii.h b/drivers/net/dsa/sja1105/sja1105_sgmii.h new file mode 100644 index 000000000000..24d9bc046e70 --- /dev/null +++ b/drivers/net/dsa/sja1105/sja1105_sgmii.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2020, NXP Semiconductors + */ +#ifndef _SJA1105_SGMII_H +#define _SJA1105_SGMII_H + +#define SJA1105_SGMII_PORT 4 + +/* DIGITAL_CONTROL_1 (address 1f8000h) */ +#define SJA1105_DC1 0x8000 +#define SJA1105_DC1_VS_RESET BIT(15) +#define SJA1105_DC1_REMOTE_LOOPBACK BIT(14) +#define SJA1105_DC1_EN_VSMMD1 BIT(13) +#define SJA1105_DC1_POWER_SAVE BIT(11) +#define SJA1105_DC1_CLOCK_STOP_EN BIT(10) +#define SJA1105_DC1_MAC_AUTO_SW BIT(9) +#define SJA1105_DC1_INIT BIT(8) +#define SJA1105_DC1_TX_DISABLE BIT(4) +#define SJA1105_DC1_AUTONEG_TIMER_OVRR BIT(3) +#define SJA1105_DC1_BYP_POWERUP BIT(1) +#define SJA1105_DC1_PHY_MODE_CONTROL BIT(0) + +/* DIGITAL_CONTROL_2 register (address 1f80E1h) */ +#define SJA1105_DC2 0x80e1 +#define SJA1105_DC2_TX_POL_INV_DISABLE BIT(4) +#define SJA1105_DC2_RX_POL_INV BIT(0) + +/* DIGITAL_ERROR_CNT register (address 1f80E2h) */ +#define SJA1105_DEC 0x80e2 +#define SJA1105_DEC_ICG_EC_ENA BIT(4) +#define SJA1105_DEC_CLEAR_ON_READ BIT(0) + +/* AUTONEG_CONTROL register (address 1f8001h) */ +#define SJA1105_AC 0x8001 +#define SJA1105_AC_MII_CONTROL BIT(8) +#define SJA1105_AC_SGMII_LINK BIT(4) +#define SJA1105_AC_PHY_MODE BIT(3) +#define SJA1105_AC_AUTONEG_MODE(x) (((x) << 1) & GENMASK(2, 1)) +#define SJA1105_AC_AUTONEG_MODE_SGMII SJA1105_AC_AUTONEG_MODE(2) + +/* AUTONEG_INTR_STATUS register (address 1f8002h) */ +#define SJA1105_AIS 0x8002 +#define SJA1105_AIS_LINK_STATUS(x) (!!((x) & BIT(4))) +#define SJA1105_AIS_SPEED(x) (((x) & GENMASK(3, 2)) >> 2) +#define SJA1105_AIS_DUPLEX_MODE(x) (!!((x) & BIT(1))) +#define SJA1105_AIS_COMPLETE(x) (!!((x) & BIT(0))) + +/* DEBUG_CONTROL register (address 1f8005h) */ +#define SJA1105_DC 0x8005 +#define SJA1105_DC_SUPPRESS_LOS BIT(4) +#define SJA1105_DC_RESTART_SYNC BIT(0) + +#endif diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 29b127f3bf9c..45da162ba268 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -474,6 +474,7 @@ static struct sja1105_regs sja1105pqrs_regs = { /* UM10944.pdf, Table 86, ACU Register overview */ .pad_mii_tx = {0x100800, 0x100802, 0x100804, 0x100806, 0x100808}, .pad_mii_id = {0x100810, 0x100811, 0x100812, 0x100813, 0x100814}, + .sgmii = 0x1F0000, .rmii_pll1 = 0x10000A, .cgu_idiv = {0x10000B, 0x10000C, 0x10000D, 0x10000E, 0x10000F}, .mac = {0x200, 0x202, 0x204, 0x206, 0x208}, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 523bf4be43cc..b19be7549aad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -300,7 +300,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, return -EINVAL; } - if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a8d9ec927627..66d31c018c7e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1375,7 +1375,6 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) { u32 dev_id, rev_id; int ret = 1; - char *s; pci_read_config_dword(oct->pci_dev, 0, &dev_id); pci_read_config_dword(oct->pci_dev, 8, &rev_id); @@ -1385,13 +1384,11 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) case OCTEON_CN68XX_PCIID: oct->chip_id = OCTEON_CN68XX; ret = lio_setup_cn68xx_octeon_device(oct); - s = "CN68XX"; break; case OCTEON_CN66XX_PCIID: oct->chip_id = OCTEON_CN66XX; ret = lio_setup_cn66xx_octeon_device(oct); - s = "CN66XX"; break; case OCTEON_CN23XX_PCIID_PF: @@ -1404,11 +1401,9 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) pci_sriov_set_totalvfs(oct->pci_dev, oct->sriov_info.max_vfs); #endif - s = "CN23XX"; break; default: - s = "?"; dev_err(&oct->pci_dev->dev, "Unknown device found (dev_id: %x)\n", dev_id); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 2a2938bbb93a..e8852dfcc1f1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -438,13 +438,118 @@ int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, return get_filter_count(adapter, fidx, hitcnt, bytecnt, hash); } -int cxgb4_get_free_ftid(struct net_device *dev, int family) +static bool cxgb4_filter_prio_in_range(struct tid_info *t, u32 idx, u8 nslots, + u32 prio) +{ + struct filter_entry *prev_tab, *next_tab, *prev_fe, *next_fe; + u32 prev_ftid, next_ftid; + + /* Only insert the rule if both of the following conditions + * are met: + * 1. The immediate previous rule has priority <= @prio. + * 2. The immediate next rule has priority >= @prio. + */ + + /* High Priority (HPFILTER) region always has higher priority + * than normal FILTER region. So, all rules in HPFILTER region + * must have prio value <= rules in normal FILTER region. + */ + if (idx < t->nhpftids) { + /* Don't insert if there's a rule already present at @idx + * in HPFILTER region. + */ + if (test_bit(idx, t->hpftid_bmap)) + return false; + + next_tab = t->hpftid_tab; + next_ftid = find_next_bit(t->hpftid_bmap, t->nhpftids, idx); + if (next_ftid >= t->nhpftids) { + /* No next entry found in HPFILTER region. + * See if there's any next entry in normal + * FILTER region. + */ + next_ftid = find_first_bit(t->ftid_bmap, t->nftids); + if (next_ftid >= t->nftids) + next_ftid = idx; + else + next_tab = t->ftid_tab; + } + + /* Search for the closest previous filter entry in HPFILTER + * region. No need to search in normal FILTER region because + * there can never be any entry in normal FILTER region whose + * prio value is < last entry in HPFILTER region. + */ + prev_ftid = find_last_bit(t->hpftid_bmap, idx); + if (prev_ftid >= idx) + prev_ftid = idx; + + prev_tab = t->hpftid_tab; + } else { + idx -= t->nhpftids; + + /* Don't insert if there's a rule already present at @idx + * in normal FILTER region. + */ + if (test_bit(idx, t->ftid_bmap)) + return false; + + prev_tab = t->ftid_tab; + prev_ftid = find_last_bit(t->ftid_bmap, idx); + if (prev_ftid >= idx) { + /* No previous entry found in normal FILTER + * region. See if there's any previous entry + * in HPFILTER region. + */ + prev_ftid = find_last_bit(t->hpftid_bmap, t->nhpftids); + if (prev_ftid >= t->nhpftids) + prev_ftid = idx; + else + prev_tab = t->hpftid_tab; + } + + /* Search for the closest next filter entry in normal + * FILTER region. No need to search in HPFILTER region + * because there can never be any entry in HPFILTER + * region whose prio value is > first entry in normal + * FILTER region. + */ + next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx); + if (next_ftid >= t->nftids) + next_ftid = idx; + + next_tab = t->ftid_tab; + } + + next_fe = &next_tab[next_ftid]; + + /* See if the filter entry belongs to an IPv6 rule, which + * occupy 4 slots on T5 and 2 slots on T6. Adjust the + * reference to the previously inserted filter entry + * accordingly. + */ + prev_fe = &prev_tab[prev_ftid & ~(nslots - 1)]; + if (!prev_fe->fs.type) + prev_fe = &prev_tab[prev_ftid]; + + if ((prev_fe->valid && prev_fe->fs.tc_prio > prio) || + (next_fe->valid && next_fe->fs.tc_prio < prio)) + return false; + + return true; +} + +int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en, + u32 tc_prio) { struct adapter *adap = netdev2adap(dev); struct tid_info *t = &adap->tids; + struct filter_entry *tab, *f; + u32 bmap_ftid, max_ftid; + unsigned long *bmap; bool found = false; - u8 i, n, cnt; - int ftid; + u8 i, cnt, n; + int ftid = 0; /* IPv4 occupy 1 slot. IPv6 occupy 2 slots on T6 and 4 slots * on T5. @@ -456,34 +561,129 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family) n += 2; } - if (n > t->nftids) - return -ENOMEM; - - /* Find free filter slots from the end of TCAM. Appropriate - * checks must be done by caller later to ensure the prio - * passed by TC doesn't conflict with prio saved by existing - * rules in the TCAM. + /* There are 3 filter regions available in hardware in + * following order of priority: + * + * 1. High Priority (HPFILTER) region (Highest Priority). + * 2. HASH region. + * 3. Normal FILTER region (Lowest Priority). + * + * Entries in HPFILTER and normal FILTER region have index + * 0 as the highest priority and the rules will be scanned + * in ascending order until either a rule hits or end of + * the region is reached. + * + * All HASH region entries have same priority. The set of + * fields to match in headers are pre-determined. The same + * set of header match fields must be compulsorily specified + * in all the rules wanting to get inserted in HASH region. + * Hence, HASH region is an exact-match region. A HASH is + * generated for a rule based on the values in the + * pre-determined set of header match fields. The generated + * HASH serves as an index into the HASH region. There can + * never be 2 rules having the same HASH. Hardware will + * compute a HASH for every incoming packet based on the + * values in the pre-determined set of header match fields + * and uses it as an index to check if there's a rule + * inserted in the HASH region at the specified index. If + * there's a rule inserted, then it's considered as a filter + * hit. Otherwise, it's a filter miss and normal FILTER region + * is scanned afterwards. */ + spin_lock_bh(&t->ftid_lock); - ftid = t->nftids - 1; - while (ftid >= n - 1) { + + ftid = (tc_prio <= t->nhpftids) ? 0 : t->nhpftids; + max_ftid = t->nftids + t->nhpftids; + while (ftid < max_ftid) { + if (ftid < t->nhpftids) { + /* If the new rule wants to get inserted into + * HPFILTER region, but its prio is greater + * than the rule with the highest prio in HASH + * region, then reject the rule. + */ + if (t->tc_hash_tids_max_prio && + tc_prio > t->tc_hash_tids_max_prio) + break; + + /* If there's not enough slots available + * in HPFILTER region, then move on to + * normal FILTER region immediately. + */ + if (ftid + n > t->nhpftids) { + ftid = t->nhpftids; + continue; + } + + bmap = t->hpftid_bmap; + bmap_ftid = ftid; + tab = t->hpftid_tab; + } else if (hash_en) { + /* Ensure priority is >= last rule in HPFILTER + * region. + */ + ftid = find_last_bit(t->hpftid_bmap, t->nhpftids); + if (ftid < t->nhpftids) { + f = &t->hpftid_tab[ftid]; + if (f->valid && tc_prio < f->fs.tc_prio) + break; + } + + /* Ensure priority is <= first rule in normal + * FILTER region. + */ + ftid = find_first_bit(t->ftid_bmap, t->nftids); + if (ftid < t->nftids) { + f = &t->ftid_tab[ftid]; + if (f->valid && tc_prio > f->fs.tc_prio) + break; + } + + found = true; + ftid = t->nhpftids; + goto out_unlock; + } else { + /* If the new rule wants to get inserted into + * normal FILTER region, but its prio is less + * than the rule with the highest prio in HASH + * region, then reject the rule. + */ + if (t->tc_hash_tids_max_prio && + tc_prio < t->tc_hash_tids_max_prio) + break; + + if (ftid + n > max_ftid) + break; + + bmap = t->ftid_bmap; + bmap_ftid = ftid - t->nhpftids; + tab = t->ftid_tab; + } + cnt = 0; for (i = 0; i < n; i++) { - if (test_bit(ftid - i, t->ftid_bmap)) + if (test_bit(bmap_ftid + i, bmap)) break; cnt++; } + if (cnt == n) { - ftid &= ~(n - 1); - found = true; - break; + /* Ensure the new rule's prio doesn't conflict + * with existing rules. + */ + if (cxgb4_filter_prio_in_range(t, ftid, n, + tc_prio)) { + ftid &= ~(n - 1); + found = true; + break; + } } - ftid -= n; + ftid += n; } - spin_unlock_bh(&t->ftid_lock); - ftid += t->nhpftids; +out_unlock: + spin_unlock_bh(&t->ftid_lock); return found ? ftid : -ENOMEM; } @@ -555,73 +755,6 @@ static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family) spin_unlock_bh(&t->ftid_lock); } -bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) -{ - struct filter_entry *prev_fe, *next_fe, *tab; - struct adapter *adap = netdev2adap(dev); - u32 prev_ftid, next_ftid, max_tid; - struct tid_info *t = &adap->tids; - unsigned long *bmap; - bool valid = true; - - if (idx < t->nhpftids) { - bmap = t->hpftid_bmap; - tab = t->hpftid_tab; - max_tid = t->nhpftids; - } else { - idx -= t->nhpftids; - bmap = t->ftid_bmap; - tab = t->ftid_tab; - max_tid = t->nftids; - } - - /* Only insert the rule if both of the following conditions - * are met: - * 1. The immediate previous rule has priority <= @prio. - * 2. The immediate next rule has priority >= @prio. - */ - spin_lock_bh(&t->ftid_lock); - - /* Don't insert if there's a rule already present at @idx. */ - if (test_bit(idx, bmap)) { - valid = false; - goto out_unlock; - } - - next_ftid = find_next_bit(bmap, max_tid, idx); - if (next_ftid >= max_tid) - next_ftid = idx; - - next_fe = &tab[next_ftid]; - - prev_ftid = find_last_bit(bmap, idx); - if (prev_ftid >= idx) - prev_ftid = idx; - - /* See if the filter entry belongs to an IPv6 rule, which - * occupy 4 slots on T5 and 2 slots on T6. Adjust the - * reference to the previously inserted filter entry - * accordingly. - */ - if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) { - prev_fe = &tab[prev_ftid & ~0x3]; - if (!prev_fe->fs.type) - prev_fe = &tab[prev_ftid]; - } else { - prev_fe = &tab[prev_ftid & ~0x1]; - if (!prev_fe->fs.type) - prev_fe = &tab[prev_ftid]; - } - - if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) || - (next_fe->valid && prio > next_fe->fs.tc_prio)) - valid = false; - -out_unlock: - spin_unlock_bh(&t->ftid_lock); - return valid; -} - /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index b3e4a645043d..b0751c0611ec 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -53,5 +53,4 @@ void clear_all_filters(struct adapter *adapter); void init_hash_filter(struct adapter *adap); bool is_filter_exact_match(struct adapter *adap, struct ch_filter_specification *fs); -bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index cc46277e98de..aec9b90313e7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -553,7 +553,7 @@ int cxgb4_validate_flow_actions(struct net_device *dev, bool act_vlan = false; int i; - if (!flow_action_basic_hw_stats_types_check(actions, extack)) + if (!flow_action_basic_hw_stats_check(actions, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, actions) { @@ -635,6 +635,64 @@ int cxgb4_validate_flow_actions(struct net_device *dev, return 0; } +static void cxgb4_tc_flower_hash_prio_add(struct adapter *adap, u32 tc_prio) +{ + spin_lock_bh(&adap->tids.ftid_lock); + if (adap->tids.tc_hash_tids_max_prio < tc_prio) + adap->tids.tc_hash_tids_max_prio = tc_prio; + spin_unlock_bh(&adap->tids.ftid_lock); +} + +static void cxgb4_tc_flower_hash_prio_del(struct adapter *adap, u32 tc_prio) +{ + struct tid_info *t = &adap->tids; + struct ch_tc_flower_entry *fe; + struct rhashtable_iter iter; + u32 found = 0; + + spin_lock_bh(&t->ftid_lock); + /* Bail if the current rule is not the one with the max + * prio. + */ + if (t->tc_hash_tids_max_prio != tc_prio) + goto out_unlock; + + /* Search for the next rule having the same or next lower + * max prio. + */ + rhashtable_walk_enter(&adap->flower_tbl, &iter); + do { + rhashtable_walk_start(&iter); + + fe = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(fe)) { + if (fe->fs.hash && + fe->fs.tc_prio <= t->tc_hash_tids_max_prio) { + t->tc_hash_tids_max_prio = fe->fs.tc_prio; + found++; + + /* Bail if we found another rule + * having the same prio as the + * current max one. + */ + if (fe->fs.tc_prio == tc_prio) + break; + } + + fe = rhashtable_walk_next(&iter); + } + + rhashtable_walk_stop(&iter); + } while (fe == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + + if (!found) + t->tc_hash_tids_max_prio = 0; + +out_unlock: + spin_unlock_bh(&t->ftid_lock); +} + int cxgb4_tc_flower_replace(struct net_device *dev, struct flow_cls_offload *cls) { @@ -644,6 +702,7 @@ int cxgb4_tc_flower_replace(struct net_device *dev, struct ch_tc_flower_entry *ch_flower; struct ch_filter_specification *fs; struct filter_ctx ctx; + u8 inet_family; int fidx, ret; if (cxgb4_validate_flow_actions(dev, &rule->action, extack)) @@ -664,39 +723,32 @@ int cxgb4_tc_flower_replace(struct net_device *dev, cxgb4_process_flow_actions(dev, &rule->action, fs); fs->hash = is_filter_exact_match(adap, fs); - if (fs->hash) { - fidx = 0; - } else { - u8 inet_family; + inet_family = fs->type ? PF_INET6 : PF_INET; - inet_family = fs->type ? PF_INET6 : PF_INET; - - /* Note that TC uses prio 0 to indicate stack to - * generate automatic prio and hence doesn't pass prio - * 0 to driver. However, the hardware TCAM index - * starts from 0. Hence, the -1 here. - */ - if (cls->common.prio <= (adap->tids.nftids + - adap->tids.nhpftids)) { - fidx = cls->common.prio - 1; - if (fidx < adap->tids.nhpftids) - fs->prio = 1; - } else { - fidx = cxgb4_get_free_ftid(dev, inet_family); - } + /* Get a free filter entry TID, where we can insert this new + * rule. Only insert rule if its prio doesn't conflict with + * existing rules. + */ + fidx = cxgb4_get_free_ftid(dev, inet_family, fs->hash, + cls->common.prio); + if (fidx < 0) { + NL_SET_ERR_MSG_MOD(extack, + "No free LETCAM index available"); + ret = -ENOMEM; + goto free_entry; + } - /* Only insert FLOWER rule if its priority doesn't - * conflict with existing rules in the LETCAM. - */ - if (fidx < 0 || - !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) { - NL_SET_ERR_MSG_MOD(extack, - "No free LETCAM index available"); - ret = -ENOMEM; - goto free_entry; - } + if (fidx < adap->tids.nhpftids) { + fs->prio = 1; + fs->hash = 0; } + /* If the rule can be inserted into HASH region, then ignore + * the index to normal FILTER region. + */ + if (fs->hash) + fidx = 0; + fs->tc_prio = cls->common.prio; fs->tc_cookie = cls->cookie; @@ -727,6 +779,9 @@ int cxgb4_tc_flower_replace(struct net_device *dev, if (ret) goto del_filter; + if (fs->hash) + cxgb4_tc_flower_hash_prio_add(adap, cls->common.prio); + return 0; del_filter: @@ -742,12 +797,17 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, { struct adapter *adap = netdev2adap(dev); struct ch_tc_flower_entry *ch_flower; + u32 tc_prio; + bool hash; int ret; ch_flower = ch_flower_lookup(adap, cls->cookie); if (!ch_flower) return -ENOENT; + hash = ch_flower->fs.hash; + tc_prio = ch_flower->fs.tc_prio; + ret = cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs); if (ret) goto err; @@ -760,6 +820,9 @@ int cxgb4_tc_flower_destroy(struct net_device *dev, } kfree_rcu(ch_flower, rcu); + if (hash) + cxgb4_tc_flower_hash_prio_del(adap, tc_prio); + err: return ret; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index d80dee4d316d..8a5ae8bc9b7d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -198,22 +198,14 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, struct ch_filter_specification *fs; int ret, fidx; - /* Note that TC uses prio 0 to indicate stack to generate - * automatic prio and hence doesn't pass prio 0 to driver. - * However, the hardware TCAM index starts from 0. Hence, the - * -1 here. 1 slot is enough to create a wildcard matchall - * VIID rule. + /* Get a free filter entry TID, where we can insert this new + * rule. Only insert rule if its prio doesn't conflict with + * existing rules. + * + * 1 slot is enough to create a wildcard matchall VIID rule. */ - if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids)) - fidx = cls->common.prio - 1; - else - fidx = cxgb4_get_free_ftid(dev, PF_INET); - - /* Only insert MATCHALL rule if its priority doesn't conflict - * with existing rules in the LETCAM. - */ - if (fidx < 0 || - !cxgb4_filter_prio_in_range(dev, fidx, cls->common.prio)) { + fidx = cxgb4_get_free_ftid(dev, PF_INET, false, cls->common.prio); + if (fidx < 0) { NL_SET_ERR_MSG_MOD(extack, "No free LETCAM index available"); return -ENOMEM; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 269b8d9e25e0..3f3c11e54d97 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -155,9 +155,10 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) struct ch_filter_specification fs; struct cxgb4_tc_u32_table *t; struct cxgb4_link *link; - unsigned int filter_id; u32 uhtid, link_uhtid; bool is_ipv6 = false; + u8 inet_family; + int filter_id; int ret; if (!can_tc_u32_offload(dev)) @@ -166,18 +167,15 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6)) return -EOPNOTSUPP; - /* Note that TC uses prio 0 to indicate stack to generate - * automatic prio and hence doesn't pass prio 0 to driver. - * However, the hardware TCAM index starts from 0. Hence, the - * -1 here. - */ - filter_id = TC_U32_NODE(cls->knode.handle) - 1; + inet_family = (protocol == htons(ETH_P_IPV6)) ? PF_INET6 : PF_INET; - /* Only insert U32 rule if its priority doesn't conflict with - * existing rules in the LETCAM. + /* Get a free filter entry TID, where we can insert this new + * rule. Only insert rule if its prio doesn't conflict with + * existing rules. */ - if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids || - !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) { + filter_id = cxgb4_get_free_ftid(dev, inet_family, false, + TC_U32_NODE(cls->knode.handle)); + if (filter_id < 0) { NL_SET_ERR_MSG_MOD(extack, "No free LETCAM index available"); return -ENOMEM; @@ -358,23 +356,65 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) struct cxgb4_link *link = NULL; struct cxgb4_tc_u32_table *t; struct filter_entry *f; + bool found = false; u32 handle, uhtid; + u8 nslots; int ret; if (!can_tc_u32_offload(dev)) return -EOPNOTSUPP; /* Fetch the location to delete the filter. */ - filter_id = TC_U32_NODE(cls->knode.handle) - 1; - if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids) - return -ERANGE; + max_tids = adapter->tids.nhpftids + adapter->tids.nftids; + + spin_lock_bh(&adapter->tids.ftid_lock); + filter_id = 0; + while (filter_id < max_tids) { + if (filter_id < adapter->tids.nhpftids) { + i = filter_id; + f = &adapter->tids.hpftid_tab[i]; + if (f->valid && f->fs.tc_cookie == cls->knode.handle) { + found = true; + break; + } - if (filter_id < adapter->tids.nhpftids) - f = &adapter->tids.hpftid_tab[filter_id]; - else - f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + i = find_next_bit(adapter->tids.hpftid_bmap, + adapter->tids.nhpftids, i + 1); + if (i >= adapter->tids.nhpftids) { + filter_id = adapter->tids.nhpftids; + continue; + } + + filter_id = i; + } else { + i = filter_id - adapter->tids.nhpftids; + f = &adapter->tids.ftid_tab[i]; + if (f->valid && f->fs.tc_cookie == cls->knode.handle) { + found = true; + break; + } + + i = find_next_bit(adapter->tids.ftid_bmap, + adapter->tids.nftids, i + 1); + if (i >= adapter->tids.nftids) + break; + + filter_id = i + adapter->tids.nhpftids; + } + + nslots = 0; + if (f->fs.type) { + nslots++; + if (CHELSIO_CHIP_VERSION(adapter->params.chip) < + CHELSIO_T6) + nslots += 2; + } + + filter_id += nslots; + } + spin_unlock_bh(&adapter->tids.ftid_lock); - if (cls->knode.handle != f->fs.tc_cookie) + if (!found) return -ERANGE; t = adapter->tc_u32; @@ -407,7 +447,6 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* If a link is being deleted, then delete all filters * associated with the link. */ - max_tids = adapter->tids.nftids; for (i = 0; i < t->size; i++) { link = &t->table[i]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 03b9bdc812cc..be831317520a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -149,6 +149,8 @@ struct tid_info { atomic_t conns_in_use; /* lock for setting/clearing filter bitmap */ spinlock_t ftid_lock; + + unsigned int tc_hash_tids_max_prio; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@ -263,7 +265,8 @@ struct filter_ctx { struct ch_filter_specification; -int cxgb4_get_free_ftid(struct net_device *dev, int family); +int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en, + u32 tc_prio); int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs, struct filter_ctx *ctx); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index 0a0c6ec2336c..8972cdd559e8 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -1082,7 +1082,7 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port, u8 qh, ql, pmap; int index, ctx; - if (!flow_action_basic_hw_stats_types_check(&rule->flow->action, NULL)) + if (!flow_action_basic_hw_stats_check(&rule->flow->action, NULL)) return -EOPNOTSUPP; memset(&c2, 0, sizeof(c2)); @@ -1308,7 +1308,7 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule) struct flow_rule *flow = rule->flow; struct flow_action_entry *act; - if (!flow_action_basic_hw_stats_types_check(&rule->flow->action, NULL)) + if (!flow_action_basic_hw_stats_check(&rule->flow->action, NULL)) return -EOPNOTSUPP; act = &flow->action.entries[0]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index e38495e4aa42..f8b2de4b04be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -3,20 +3,14 @@ #include "en/devlink.h" -int mlx5e_devlink_port_register(struct net_device *netdev) +int mlx5e_devlink_port_register(struct mlx5e_priv *priv) { - struct mlx5_core_dev *dev; - struct mlx5e_priv *priv; - struct devlink *devlink; - int err; + struct devlink *devlink = priv_to_devlink(priv->mdev); - priv = netdev_priv(netdev); - dev = priv->mdev; - - if (mlx5_core_is_pf(dev)) + if (mlx5_core_is_pf(priv->mdev)) devlink_port_attrs_set(&priv->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - PCI_FUNC(dev->pdev->devfn), + PCI_FUNC(priv->mdev->pdev->devfn), false, 0, NULL, 0); else @@ -24,12 +18,12 @@ int mlx5e_devlink_port_register(struct net_device *netdev) DEVLINK_PORT_FLAVOUR_VIRTUAL, 0, false, 0, NULL, 0); - devlink = priv_to_devlink(dev); - err = devlink_port_register(devlink, &priv->dl_port, 1); - if (err) - return err; - devlink_port_type_eth_set(&priv->dl_port, netdev); - return 0; + return devlink_port_register(devlink, &priv->dl_port, 1); +} + +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) +{ + devlink_port_type_eth_set(&priv->dl_port, priv->netdev); } void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h index 3e5393a0901f..83123a801adc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -7,8 +7,9 @@ #include <net/devlink.h> #include "en.h" -int mlx5e_devlink_port_register(struct net_device *dev); +int mlx5e_devlink_port_register(struct mlx5e_priv *priv); void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); +void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv); struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 956d9ddcdeed..a22ad6b90847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -484,19 +484,23 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; struct mlx5_esw_flow_attr *attr = &zone_rule->attr; struct mlx5_eswitch *esw = ct_priv->esw; - struct mlx5_flow_spec spec = {}; + struct mlx5_flow_spec *spec = NULL; u32 tupleid = 1; int err; zone_rule->nat = nat; + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + /* Get tuple unique id */ err = idr_alloc_u32(&ct_priv->tuple_ids, zone_rule, &tupleid, TUPLE_ID_MAX, GFP_KERNEL); if (err) { netdev_warn(ct_priv->netdev, "Failed to allocate tuple id, err: %d\n", err); - return err; + goto err_idr_alloc; } zone_rule->tupleid = tupleid; @@ -517,18 +521,19 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->counter = entry->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; - mlx5_tc_ct_set_tuple_match(&spec, flow_rule); - mlx5e_tc_match_to_reg_match(&spec, ZONE_TO_REG, + mlx5_tc_ct_set_tuple_match(spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->zone & MLX5_CT_ZONE_MASK, MLX5_CT_ZONE_MASK); - zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, &spec, attr); + zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); if (IS_ERR(zone_rule->rule)) { err = PTR_ERR(zone_rule->rule); ct_dbg("Failed to add ct entry rule, nat: %d", nat); goto err_rule; } + kfree(spec); ct_dbg("Offloaded ct entry rule in zone %d", entry->zone); return 0; @@ -537,6 +542,8 @@ err_rule: mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); err_mod_hdr: idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid); +err_idr_alloc: + kfree(spec); return err; } @@ -696,7 +703,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, { struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); struct flow_dissector_key_ct *mask, *key; - bool trk, est, untrk, unest, new, unnew; + bool trk, est, untrk, unest, new; u32 ctstate = 0, ctstate_mask = 0; u16 ct_state_on, ct_state_off; u16 ct_state, ct_state_mask; @@ -739,7 +746,6 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; - unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW; unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; @@ -885,8 +891,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_flow_spec *post_ct_spec = NULL; struct mlx5_eswitch *esw = ct_priv->esw; - struct mlx5_flow_spec post_ct_spec = {}; struct mlx5_esw_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_flow_handle *rule; @@ -895,9 +901,13 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, struct mlx5_ct_ft *ft; u32 fte_id = 1; + post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!ct_flow) + if (!post_ct_spec || !ct_flow) { + kfree(post_ct_spec); + kfree(ct_flow); return -ENOMEM; + } /* Register for CT established events */ ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, @@ -992,7 +1002,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, /* Post ct rule matches on fte_id and executes original rule's * tc rule action */ - mlx5e_tc_match_to_reg_match(&post_ct_spec, FTEID_TO_REG, + mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, fte_id, MLX5_FTE_ID_MASK); /* Put post_ct rule on post_ct fdb */ @@ -1003,7 +1013,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); - rule = mlx5_eswitch_add_offloaded_rule(esw, &post_ct_spec, + rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, &ct_flow->post_ct_attr); ct_flow->post_ct_rule = rule; if (IS_ERR(ct_flow->post_ct_rule)) { @@ -1027,6 +1037,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, attr->ct_attr.ct_flow = ct_flow; *flow_rule = ct_flow->post_ct_rule; dealloc_mod_hdr_actions(&pre_mod_acts); + kfree(post_ct_spec); return 0; @@ -1043,6 +1054,7 @@ err_get_chain: err_idr: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: + kfree(post_ct_spec); kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 6b2c893372da..091d305b633e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -8,6 +8,8 @@ #include <linux/mlx5/fs.h> #include <net/tc_act/tc_ct.h> +#include "en.h" + struct mlx5_esw_flow_attr; struct mlx5e_tc_mod_hdr_acts; struct mlx5_rep_uplink_priv; @@ -128,6 +130,11 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, struct flow_cls_offload *f, struct netlink_ext_ack *extack) { + if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); return -EOPNOTSUPP; } @@ -137,6 +144,8 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + netdev_warn(priv->netdev, "mlx5 tc ct offload isn't enabled.\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 608d0e07c308..b45c3f46570b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -66,6 +66,9 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) return -EOPNOTSUPP; + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + return -EOPNOTSUPP; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f9c928afec89..be20d2247594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5467,25 +5467,27 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) goto err_destroy_netdev; } - err = register_netdev(netdev); + err = mlx5e_devlink_port_register(priv); if (err) { - mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); goto err_detach; } - err = mlx5e_devlink_port_register(netdev); + err = register_netdev(netdev); if (err) { - mlx5_core_err(mdev, "mlx5e_devlink_phy_port_register failed, %d\n", err); - goto err_unregister_netdev; + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_devlink_port_unregister; } + mlx5e_devlink_port_type_eth_set(priv); + #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); #endif return priv; -err_unregister_netdev: - unregister_netdev(netdev); +err_devlink_port_unregister: + mlx5e_devlink_port_unregister(priv); err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 044891a03be3..901f88a886c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1985,11 +1985,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, *match_inner = !needs_mapping; if ((needs_mapping || sets_mapping) && - !mlx5_eswitch_vport_match_metadata_enabled(esw)) { + !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { NL_SET_ERR_MSG(extack, - "Chains on tunnel devices isn't supported without register metadata support"); + "Chains on tunnel devices isn't supported without register loopback support"); netdev_warn(priv->netdev, - "Chains on tunnel devices isn't supported without register metadata support"); + "Chains on tunnel devices isn't supported without register loopback support"); return -EOPNOTSUPP; } @@ -3044,8 +3044,7 @@ static bool actions_match_supported(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { - struct net_device *filter_dev = parse_attr->filter_dev; - bool drop_action, pop_action, ct_flow; + bool ct_flow; u32 actions; ct_flow = flow_flag_test(flow, CT); @@ -3064,18 +3063,6 @@ static bool actions_match_supported(struct mlx5e_priv *priv, actions = flow->nic_attr->action; } - drop_action = actions & MLX5_FLOW_CONTEXT_ACTION_DROP; - pop_action = actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - - if (flow_flag_test(flow, EGRESS) && !drop_action) { - /* We only support filters on tunnel device, or on vlan - * devices if they have pop/drop action - */ - if (!mlx5e_get_tc_tun(filter_dev) || - (is_vlan_dev(filter_dev) && !pop_action)) - return false; - } - if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, flow_action, actions, @@ -3180,8 +3167,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; - if (!flow_action_hw_stats_types_check(flow_action, extack, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT)) + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -3654,6 +3641,46 @@ static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw, return 0; } +static int verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored esw_attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EOPNOTSUPP; + } + return 0; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -3675,8 +3702,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!flow_action_has_entries(flow_action)) return -EINVAL; - if (!flow_action_hw_stats_types_check(flow_action, extack, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT)) + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { @@ -3751,7 +3778,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); struct net_device *uplink_upper; - struct mlx5e_rep_priv *rep_priv; if (is_duplicated_output_device(priv->netdev, out_dev, @@ -3787,21 +3813,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } - /* Don't allow forwarding between uplink. - * - * Input vport was stored esw_attr->in_rep. - * In LAG case, *priv* is the private data of - * uplink which may be not the input vport. - */ - rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); - if (mlx5e_eswitch_uplink_rep(rep_priv->netdev) && - mlx5e_eswitch_uplink_rep(out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "devices are both uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink, can't offload forwarding\n", - priv->netdev->name, out_dev->name); - return -EOPNOTSUPP; - } + err = verify_uplink_forwarding(priv, flow, out_dev, extack); + if (err) + return err; if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -4510,7 +4524,7 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { @@ -4534,8 +4548,14 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = ma->common.extack; + if (!mlx5_esw_qos_enabled(esw)) { + NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); + return -EOPNOTSUPP; + } + if (ma->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 91b2aedcf52b..95532b258c2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -332,6 +332,7 @@ struct mlx5_termtbl_handle; bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec); @@ -393,6 +394,7 @@ enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ESW_ATTR_FLAG_HAIRPIN = BIT(3), }; struct mlx5_esw_flow_attr { @@ -453,6 +455,11 @@ int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u16 vlan_id, u32 flow_action); +static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) +{ + return esw->qos.enabled; +} + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { @@ -677,7 +684,7 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} -static struct mlx5_flow_handle * +static inline struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e2a906085a98..0b4b43ebae9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -300,6 +300,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; + bool hairpin = false; int j, i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) @@ -397,16 +398,21 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } - if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) { rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, &flow_act, dest, i); - else + hairpin = true; + } else { rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + } if (IS_ERR(rule)) goto err_add_rule; else atomic64_inc(&esw->offloads.num_flows); + if (hairpin) + attr->flags |= MLX5_ESW_ATTR_FLAG_HAIRPIN; + return rule; err_add_rule: @@ -495,10 +501,12 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - /* unref the term table */ - for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { - if (attr->dests[i].termtbl) - mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + if (attr->flags & MLX5_ESW_ATTR_FLAG_HAIRPIN) { + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } } atomic64_dec(&esw->offloads.num_flows); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index 81421d4fb18d..1e275a8441de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -23,6 +23,8 @@ #define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) #define fdb_ignore_flow_level_supported(esw) \ (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) /* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated @@ -107,7 +109,8 @@ bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw) { - return fdb_ignore_flow_level_supported(esw); + return mlx5_esw_chains_prios_supported(esw) && + fdb_ignore_flow_level_supported(esw); } u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) @@ -419,7 +422,8 @@ mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = next_fdb; - if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { + if (next_fdb == tc_end_fdb(esw) && + fdb_modify_header_fwd_to_table_supported(esw)) { act.modify_hdr = fdb_chain->miss_modify_hdr; act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } @@ -779,6 +783,13 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; } else { esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 269eddc3d38b..17a0d2bc102b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -3,6 +3,7 @@ #include <linux/mlx5/fs.h> #include "eswitch.h" +#include "fs_core.h" struct mlx5_termtbl_handle { struct hlist_node termtbl_hlist; @@ -28,6 +29,10 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); + if (dest->vport.pkt_reformat) + hash = jhash(dest->vport.pkt_reformat, + sizeof(*dest->vport.pkt_reformat), + hash); return hash; } @@ -37,11 +42,19 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, struct mlx5_flow_act *flow_act2, struct mlx5_flow_destination *dest2) { - return flow_act1->action != flow_act2->action || - dest1->vport.num != dest2->vport.num || - dest1->vport.vhca_id != dest2->vport.vhca_id || - memcmp(&flow_act1->vlan, &flow_act2->vlan, - sizeof(flow_act1->vlan)); + int ret; + + ret = flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); + if (ret) + return ret; + + return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? + memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, + sizeof(*dest1->vport.pkt_reformat)) : 0; } static int @@ -62,7 +75,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, /* As this is the terminating action then the termination table is the * same prio as the slow path */ - ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION; + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; ft_attr.prio = FDB_SLOW_PATH; ft_attr.max_fte = 1; ft_attr.autogroup.max_num_groups = 1; @@ -74,7 +88,6 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); - if (IS_ERR(tt->rule)) { esw_warn(dev, "Failed to create termination table rule\n"); goto add_flow_err; @@ -92,7 +105,8 @@ add_flow_err: static struct mlx5_termtbl_handle * mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, struct mlx5_flow_act *flow_act, - struct mlx5_flow_destination *dest) + struct mlx5_flow_destination *dest, + struct mlx5_esw_flow_attr *attr) { struct mlx5_termtbl_handle *tt; bool found = false; @@ -100,7 +114,6 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, int err; mutex_lock(&esw->offloads.termtbl_mutex); - hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); hash_for_each_possible(esw->offloads.termtbl_tbl, tt, termtbl_hlist, hash_key) { @@ -122,6 +135,7 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; tt->dest.vport.num = dest->vport.num; tt->dest.vport.vhca_id = dest->vport.vhca_id; + tt->dest.vport.flags = dest->vport.flags; memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); @@ -156,25 +170,44 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, } } +static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt) +{ + switch (rt->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + return true; + default: + return false; + } +} + static void mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, struct mlx5_flow_act *dst) { - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) - return; - - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); - memset(&src->vlan[0], 0, sizeof(src->vlan[0])); - - if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) - return; + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + } + } - src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); - memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dst->pkt_reformat = src->pkt_reformat; + src->pkt_reformat = NULL; + } } static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, @@ -195,15 +228,27 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, struct mlx5_flow_act *flow_act, struct mlx5_flow_spec *spec) { - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + int i; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH || + !mlx5_eswitch_offload_is_uplink_port(esw, spec)) return false; /* push vlan on RX */ - return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && - mlx5_eswitch_offload_is_uplink_port(esw, spec); + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) + return true; + + /* hairpin */ + for (i = attr->split_count; i < attr->out_count; i++) + if (attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + return true; + + return false; } struct mlx5_flow_handle * @@ -233,7 +278,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, /* get the terminating table for the action list */ tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, - &dest[i]); + &dest[i], attr); if (IS_ERR(tt)) { esw_warn(esw->dev, "Failed to create termination table\n"); goto revert_changes; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 1f2e6db743e1..c713bc22da7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -1238,6 +1238,59 @@ err_append_allocated_mirror: } EXPORT_SYMBOL(mlxsw_afa_block_append_mirror); +/* QoS Action + * ---------- + * The QOS_ACTION is used for manipulating the QoS attributes of a packet. It + * can be used to change the DCSP, ECN, Color and Switch Priority of the packet. + * Note that PCP field can be changed using the VLAN action. + */ + +#define MLXSW_AFA_QOS_CODE 0x06 +#define MLXSW_AFA_QOS_SIZE 1 + +enum mlxsw_afa_qos_cmd { + /* Do nothing */ + MLXSW_AFA_QOS_CMD_NOP, + /* Set a field */ + MLXSW_AFA_QOS_CMD_SET, +}; + +/* afa_qos_switch_prio_cmd + * Switch Priority command as per mlxsw_afa_qos_cmd. + */ +MLXSW_ITEM32(afa, qos, switch_prio_cmd, 0x08, 14, 2); + +/* afa_qos_switch_prio + * Switch Priority. + */ +MLXSW_ITEM32(afa, qos, switch_prio, 0x08, 0, 4); + +static inline void +mlxsw_afa_qos_switch_prio_pack(char *payload, + enum mlxsw_afa_qos_cmd prio_cmd, u8 prio) +{ + mlxsw_afa_qos_switch_prio_cmd_set(payload, prio_cmd); + mlxsw_afa_qos_switch_prio_set(payload, prio); +} + +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_QOS_CODE, + MLXSW_AFA_QOS_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append QOS action"); + return PTR_ERR(act); + } + mlxsw_afa_qos_switch_prio_pack(act, MLXSW_AFA_QOS_CMD_SET, + prio); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_qos_switch_prio); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 5f4c1e505136..2125d7d6bcb0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -62,6 +62,9 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, u16 vid, u8 pcp, u8 et, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_qos_switch_prio(struct mlxsw_afa_block *block, + u8 prio, + struct netlink_ext_ack *extack); int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, u32 counter_index); int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 6534184cb942..d62496ef299c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -18,6 +18,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_CQE_V1, MLXSW_RES_ID_CQE_V2, MLXSW_RES_ID_COUNTER_POOL_SIZE, + MLXSW_RES_ID_COUNTER_BANK_SIZE, MLXSW_RES_ID_MAX_SPAN, MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, @@ -75,6 +76,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_CQE_V1] = 0x2211, [MLXSW_RES_ID_CQE_V2] = 0x2212, [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, + [MLXSW_RES_ID_COUNTER_BANK_SIZE] = 0x2411, [MLXSW_RES_ID_MAX_SPAN] = 0x2420, [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 51709012593e..35d3a68ef4fd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5421,8 +5421,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; @@ -5440,8 +5445,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_span_register; + err = mlxsw_sp_counter_resources_register(mlxsw_core); + if (err) + goto err_resources_counter_register; + return 0; +err_resources_counter_register: err_resources_span_register: devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 81801c6fb941..bbd8bec8fee4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -46,6 +46,10 @@ #define MLXSW_SP_RESOURCE_NAME_SPAN "span_agents" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS "counters" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW "flow" +#define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif" + enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD = 1, MLXSW_SP_RESOURCE_KVD_LINEAR, @@ -55,6 +59,9 @@ enum mlxsw_sp_resource_id { MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, MLXSW_SP_RESOURCE_SPAN, + MLXSW_SP_RESOURCE_COUNTERS, + MLXSW_SP_RESOURCE_COUNTERS_FLOW, + MLXSW_SP_RESOURCE_COUNTERS_RIF, }; struct mlxsw_sp_port; @@ -739,6 +746,9 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 action, u16 vid, u16 proto, u8 prio, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack); int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 6f8d5005ff36..01324d002680 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -638,6 +638,23 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, } } +int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 prio, struct netlink_ext_ack *extack) +{ + /* Even though both Linux and Spectrum switches support 16 priorities, + * spectrum_qdisc only processes the first eight priomap elements, and + * the DCB and PFC features are tied to 8 priorities as well. Therefore + * bounce attempts to prioritize packets to higher priorities. + */ + if (prio >= IEEE_8021QAZ_MAX_TCS) { + NL_SET_ERR_MSG_MOD(extack, "Only priorities 0..7 are supported"); + return -EINVAL; + } + return mlxsw_afa_block_append_qos_switch_prio(rulei->act_block, prio, + extack); +} + int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct netlink_ext_ack *extack) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c index 6a02ef9ec00e..0268f0a6662a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -7,91 +7,143 @@ #include "spectrum_cnt.h" -#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096 - struct mlxsw_sp_counter_sub_pool { + u64 size; unsigned int base_index; - unsigned int size; + enum mlxsw_res_id entry_size_res_id; + const char *resource_name; /* devlink resource name */ + u64 resource_id; /* devlink resource id */ unsigned int entry_size; unsigned int bank_count; + atomic_t active_entries_count; }; struct mlxsw_sp_counter_pool { - unsigned int pool_size; + u64 pool_size; unsigned long *usage; /* Usage bitmap */ spinlock_t counter_pool_lock; /* Protects counter pool allocations */ - struct mlxsw_sp_counter_sub_pool *sub_pools; + atomic_t active_entries_count; + unsigned int sub_pools_count; + struct mlxsw_sp_counter_sub_pool sub_pools[]; }; -static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { +static const struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_FLOW, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_FLOW, .bank_count = 6, }, [MLXSW_SP_COUNTER_SUB_POOL_RIF] = { + .entry_size_res_id = MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, + .resource_name = MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF, + .resource_id = MLXSW_SP_RESOURCE_COUNTERS_RIF, .bank_count = 2, } }; -static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp) +static u64 mlxsw_sp_counter_sub_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_sub_pool *sub_pool = priv; + + return atomic_read(&sub_pool->active_entries_count); +} + +static int mlxsw_sp_counter_sub_pools_init(struct mlxsw_sp *mlxsw_sp) { - unsigned int total_bank_config = 0; - unsigned int pool_size; + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int base_index = 0; + enum mlxsw_res_id res_id; + int err; int i; - pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); - /* Check config is valid, no bank over subscription */ - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) - total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count; - if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1) - return -EINVAL; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + res_id = sub_pool->entry_size_res_id; + + if (!mlxsw_core_res_valid(mlxsw_sp->core, res_id)) + return -EIO; + sub_pool->entry_size = mlxsw_core_res_get(mlxsw_sp->core, + res_id); + err = devlink_resource_size_get(devlink, + sub_pool->resource_id, + &sub_pool->size); + if (err) + goto err_resource_size_get; + + devlink_resource_occ_get_register(devlink, + sub_pool->resource_id, + mlxsw_sp_counter_sub_pool_occ_get, + sub_pool); + + sub_pool->base_index = base_index; + base_index += sub_pool->size; + atomic_set(&sub_pool->active_entries_count, 0); + } return 0; + +err_resource_size_get: + for (i--; i >= 0; i--) { + sub_pool = &pool->sub_pools[i]; + + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } + return err; } -static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_counter_sub_pools_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; - /* Prepare generic flow pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_PACKETS_BYTES); - /* Prepare erif pool*/ - sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF]; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC)) - return -EIO; - sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - COUNTER_SIZE_ROUTER_BASIC); - return 0; + for (i = 0; i < pool->sub_pools_count; i++) { + sub_pool = &pool->sub_pools[i]; + + WARN_ON(atomic_read(&sub_pool->active_entries_count)); + devlink_resource_occ_get_unregister(devlink, + sub_pool->resource_id); + } +} + +static u64 mlxsw_sp_counter_pool_occ_get(void *priv) +{ + const struct mlxsw_sp_counter_pool *pool = priv; + + return atomic_read(&pool->active_entries_count); } int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) { + unsigned int sub_pools_count = ARRAY_SIZE(mlxsw_sp_counter_sub_pools); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_counter_sub_pool *sub_pool; struct mlxsw_sp_counter_pool *pool; - unsigned int base_index; unsigned int map_size; - int i; int err; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE)) - return -EIO; - - err = mlxsw_sp_counter_pool_validate(mlxsw_sp); - if (err) - return err; - - err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp); - if (err) - return err; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); + pool = kzalloc(struct_size(pool, sub_pools, sub_pools_count), + GFP_KERNEL); if (!pool) return -ENOMEM; + mlxsw_sp->counter_pool = pool; + memcpy(pool->sub_pools, mlxsw_sp_counter_sub_pools, + sub_pools_count * sizeof(*sub_pool)); + pool->sub_pools_count = sub_pools_count; spin_lock_init(&pool->counter_pool_lock); + atomic_set(&pool->active_entries_count, 0); + + err = devlink_resource_size_get(devlink, MLXSW_SP_RESOURCE_COUNTERS, + &pool->pool_size); + if (err) + goto err_pool_resource_size_get; + devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_COUNTERS, + mlxsw_sp_counter_pool_occ_get, pool); - pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long); pool->usage = kzalloc(map_size, GFP_KERNEL); @@ -100,26 +152,18 @@ int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) goto err_usage_alloc; } - pool->sub_pools = mlxsw_sp_counter_sub_pools; - /* Allocation is based on bank count which should be - * specified for each sub pool statically. - */ - base_index = 0; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { - sub_pool = &pool->sub_pools[i]; - sub_pool->size = sub_pool->bank_count * - MLXSW_SP_COUNTER_POOL_BANK_SIZE; - sub_pool->base_index = base_index; - base_index += sub_pool->size; - /* The last bank can't be fully used */ - if (sub_pool->base_index + sub_pool->size > pool->pool_size) - sub_pool->size = pool->pool_size - sub_pool->base_index; - } + err = mlxsw_sp_counter_sub_pools_init(mlxsw_sp); + if (err) + goto err_sub_pools_init; - mlxsw_sp->counter_pool = pool; return 0; +err_sub_pools_init: + kfree(pool->usage); err_usage_alloc: + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); +err_pool_resource_size_get: kfree(pool); return err; } @@ -127,10 +171,15 @@ err_usage_alloc: void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_counter_sub_pools_fini(mlxsw_sp); WARN_ON(find_first_bit(pool->usage, pool->pool_size) != pool->pool_size); + WARN_ON(atomic_read(&pool->active_entries_count)); kfree(pool->usage); + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_COUNTERS); kfree(pool); } @@ -144,7 +193,7 @@ int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int stop_index; int i, err; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; stop_index = sub_pool->base_index + sub_pool->size; entry_index = sub_pool->base_index; @@ -166,6 +215,8 @@ int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, spin_unlock(&pool->counter_pool_lock); *p_counter_index = entry_index; + atomic_add(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_add(sub_pool->entry_size, &pool->active_entries_count); return 0; err_alloc: @@ -183,9 +234,77 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(counter_index >= pool->pool_size)) return; - sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + sub_pool = &pool->sub_pools[sub_pool_id]; spin_lock(&pool->counter_pool_lock); for (i = 0; i < sub_pool->entry_size; i++) __clear_bit(counter_index + i, pool->usage); spin_unlock(&pool->counter_pool_lock); + atomic_sub(sub_pool->entry_size, &sub_pool->active_entries_count); + atomic_sub(sub_pool->entry_size, &pool->active_entries_count); +} + +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core) +{ + static struct devlink_resource_size_params size_params; + struct devlink *devlink = priv_to_devlink(mlxsw_core); + const struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int total_bank_config; + u64 sub_pool_size; + u64 base_index; + u64 pool_size; + u64 bank_size; + int err; + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_POOL_SIZE) || + !MLXSW_CORE_RES_VALID(mlxsw_core, COUNTER_BANK_SIZE)) + return -EIO; + + pool_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_POOL_SIZE); + bank_size = MLXSW_CORE_RES_GET(mlxsw_core, COUNTER_BANK_SIZE); + + devlink_resource_size_params_init(&size_params, pool_size, + pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + MLXSW_SP_RESOURCE_NAME_COUNTERS, + pool_size, + MLXSW_SP_RESOURCE_COUNTERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); + if (err) + return err; + + /* Allocation is based on bank count which should be + * specified for each sub pool statically. + */ + total_bank_config = 0; + base_index = 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { + sub_pool = &mlxsw_sp_counter_sub_pools[i]; + sub_pool_size = sub_pool->bank_count * bank_size; + /* The last bank can't be fully used */ + if (base_index + sub_pool_size > pool_size) + sub_pool_size = pool_size - base_index; + base_index += sub_pool_size; + + devlink_resource_size_params_init(&size_params, sub_pool_size, + sub_pool_size, bank_size, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, + sub_pool->resource_name, + sub_pool_size, + sub_pool->resource_id, + MLXSW_SP_RESOURCE_COUNTERS, + &size_params); + if (err) + return err; + total_bank_config += sub_pool->bank_count; + } + + /* Check config is valid, no bank over subscription */ + if (WARN_ON(total_bank_config > pool_size / bank_size + 1)) + return -EINVAL; + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h index 81465e267b10..a68d931090dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -4,6 +4,7 @@ #ifndef _MLXSW_SPECTRUM_CNT_H #define _MLXSW_SPECTRUM_CNT_H +#include "core.h" #include "spectrum.h" enum mlxsw_sp_counter_sub_pool_id { @@ -19,5 +20,6 @@ void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index); int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_counter_resources_register(struct mlxsw_core *mlxsw_core); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 88aa554415df..1cb023955d8f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -26,17 +26,17 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (!flow_action_has_entries(flow_action)) return 0; - if (!flow_action_mixed_hw_stats_types_check(flow_action, extack)) + if (!flow_action_mixed_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; act = flow_action_first_entry_get(flow_action); - if (act->hw_stats_type == FLOW_ACTION_HW_STATS_TYPE_ANY || - act->hw_stats_type == FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE) { + if (act->hw_stats_type == FLOW_ACTION_HW_STATS_ANY || + act->hw_stats_type == FLOW_ACTION_HW_STATS_IMMEDIATE) { /* Count action is inserted first */ err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); if (err) return err; - } else if (act->hw_stats_type != FLOW_ACTION_HW_STATS_TYPE_DISABLED) { + } else if (act->hw_stats_type != FLOW_ACTION_HW_STATS_DISABLED) { NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); return -EOPNOTSUPP; } @@ -154,6 +154,10 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, act->id, vid, proto, prio, extack); } + case FLOW_ACTION_PRIORITY: + return mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei, + act->priority, + extack); default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 6d84173373c7..873a9944fbfb 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -17,8 +17,8 @@ static int ocelot_flower_parse_action(struct flow_cls_offload *f, if (!flow_offload_has_one_action(&f->rule->action)) return -EOPNOTSUPP; - if (!flow_action_basic_hw_stats_types_check(&f->rule->action, - f->common.extack)) + if (!flow_action_basic_hw_stats_check(&f->rule->action, + f->common.extack)) return -EOPNOTSUPP; flow_action_for_each(i, a, &f->rule->action) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 4aa7346cb040..1c76e1592ca2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -1207,8 +1207,8 @@ int nfp_flower_compile_action(struct nfp_app *app, bool pkt_host = false; u32 csum_updated = 0; - if (!flow_action_basic_hw_stats_types_check(&flow->rule->action, - extack)) + if (!flow_action_hw_stats_check(&flow->rule->action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) return -EOPNOTSUPP; memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index ed14164468a1..273c889faaad 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -77,12 +77,16 @@ int ionic_devlink_register(struct ionic *ionic) return err; } + /* don't register the mgmt_nic as a port */ + if (ionic->is_mgmt_nic) + return 0; + devlink_port_attrs_set(&ionic->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, 0, false, 0, NULL, 0); err = devlink_port_register(dl, &ionic->dl_port, 0); if (err) dev_err(ionic->dev, "devlink_port_register failed: %d\n", err); - else if (!ionic->is_mgmt_nic) + else devlink_port_type_eth_set(&ionic->dl_port, ionic->master_lif->netdev); @@ -93,6 +97,7 @@ void ionic_devlink_unregister(struct ionic *ionic) { struct devlink *dl = priv_to_devlink(ionic); - devlink_port_unregister(&ionic->dl_port); + if (ionic->dl_port.registered) + devlink_port_unregister(&ionic->dl_port); devlink_unregister(dl); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index a233716eac29..6996229facfd 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -3,6 +3,7 @@ #include <linux/module.h> #include <linux/netdevice.h> +#include <linux/sfp.h> #include "ionic.h" #include "ionic_bus.h" @@ -677,23 +678,27 @@ static int ionic_get_module_info(struct net_device *netdev, struct ionic_lif *lif = netdev_priv(netdev); struct ionic_dev *idev = &lif->ionic->idev; struct ionic_xcvr_status *xcvr; + struct sfp_eeprom_base *sfp; xcvr = &idev->port_info->status.xcvr; + sfp = (struct sfp_eeprom_base *) xcvr->sprom; /* report the module data type and length */ - switch (xcvr->sprom[0]) { - case 0x03: /* SFP */ + switch (sfp->phys_id) { + case SFF8024_ID_SFP: modinfo->type = ETH_MODULE_SFF_8079; modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; break; - case 0x0D: /* QSFP */ - case 0x11: /* QSFP28 */ + case SFF8024_ID_QSFP_8436_8636: + case SFF8024_ID_QSFP28_8636: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; default: netdev_info(netdev, "unknown xcvr type 0x%02x\n", xcvr->sprom[0]); + modinfo->type = 0; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; break; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index b903016193df..12e3823b0bc1 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2066,9 +2066,11 @@ static void ionic_lif_deinit(struct ionic_lif *lif) clear_bit(IONIC_LIF_F_INITED, lif->state); ionic_rx_filters_deinit(lif); - ionic_lif_rss_deinit(lif); + if (lif->netdev->features & NETIF_F_RXHASH) + ionic_lif_rss_deinit(lif); napi_disable(&lif->adminqcq->napi); + netif_napi_del(&lif->adminqcq->napi); ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index e4a76e66f542..c5e3d7639f7e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -58,6 +58,8 @@ static const char *ionic_error_to_str(enum ionic_status_code code) return "IONIC_RC_BAD_ADDR"; case IONIC_RC_DEV_CMD: return "IONIC_RC_DEV_CMD"; + case IONIC_RC_ENOSUPP: + return "IONIC_RC_ENOSUPP"; case IONIC_RC_ERROR: return "IONIC_RC_ERROR"; case IONIC_RC_ERDMA: @@ -76,6 +78,7 @@ static int ionic_error_to_errno(enum ionic_status_code code) case IONIC_RC_EQTYPE: case IONIC_RC_EQID: case IONIC_RC_EINVAL: + case IONIC_RC_ENOSUPP: return -EINVAL; case IONIC_RC_EPERM: return -EPERM; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 4f7676f4e624..812c7766e096 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1566,7 +1566,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev, static int qede_selftest_receive_traffic(struct qede_dev *edev) { - u16 hw_comp_cons, sw_comp_cons, sw_rx_index, len; + u16 sw_rx_index, len; struct eth_fast_path_rx_reg_cqe *fp_cqe; struct qede_rx_queue *rxq = NULL; struct sw_rx_data *sw_rx_data; @@ -1596,17 +1596,6 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) continue; } - hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr); - sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring); - - /* Memory barrier to prevent the CPU from doing speculative - * reads of CQE/BD before reading hw_comp_cons. If the CQE is - * read before it is written by FW, then FW writes CQE and SB, - * and then the CPU reads the hw_comp_cons, it will use an old - * CQE. - */ - rmb(); - /* Get the CQE from the completion ring */ cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring); diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 6505f7e2d1db..fe72bb6c9455 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1757,7 +1757,7 @@ static int qede_parse_actions(struct qede_dev *edev, return -EINVAL; } - if (!flow_action_basic_hw_stats_types_check(flow_action, extack)) + if (!flow_action_basic_hw_stats_check(flow_action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, flow_action) { diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index b0d76bc19673..1799ff9a45d9 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -200,11 +200,11 @@ void efx_link_status_changed(struct efx_nic *efx) unsigned int efx_xdp_max_mtu(struct efx_nic *efx) { /* The maximum MTU that we can fit in a single page, allowing for - * framing, overhead and XDP headroom. + * framing, overhead and XDP headroom + tailroom. */ int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + efx->rx_prefix_size + efx->type->rx_buffer_padding + - efx->rx_ip_align + XDP_PACKET_HEADROOM; + efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM; return PAGE_SIZE - overhead; } @@ -302,8 +302,9 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_dma_len = (efx->rx_prefix_size + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + efx->type->rx_buffer_padding); - rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + - efx->rx_ip_align + efx->rx_dma_len); + rx_buf_len = (sizeof(struct efx_rx_page_state) + EFX_XDP_HEADROOM + + efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM); + if (rx_buf_len <= PAGE_SIZE) { efx->rx_scatter = efx->type->always_rx_scatter; efx->rx_buffer_order = 0; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 9a637cd67f43..04e88d05e8ff 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -232,9 +232,6 @@ static int efx_ethtool_set_coalesce(struct net_device *net_dev, bool adaptive, rx_may_override_tx; int rc; - if (coalesce->use_adaptive_tx_coalesce) - return -EINVAL; - efx_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &adaptive); if (coalesce->rx_coalesce_usecs != rx_usecs) @@ -1138,6 +1135,9 @@ static int efx_ethtool_set_fecparam(struct net_device *net_dev, } const struct ethtool_ops efx_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USECS_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = efx_ethtool_get_drvinfo, .get_regs_len = efx_ethtool_get_regs_len, .get_regs = efx_ethtool_get_regs, diff --git a/drivers/net/ethernet/sfc/falcon/ethtool.c b/drivers/net/ethernet/sfc/falcon/ethtool.c index 08bd6a321918..db90d94e24c9 100644 --- a/drivers/net/ethernet/sfc/falcon/ethtool.c +++ b/drivers/net/ethernet/sfc/falcon/ethtool.c @@ -603,9 +603,6 @@ static int ef4_ethtool_set_coalesce(struct net_device *net_dev, bool adaptive, rx_may_override_tx; int rc; - if (coalesce->use_adaptive_tx_coalesce) - return -EINVAL; - ef4_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &adaptive); if (coalesce->rx_coalesce_usecs != rx_usecs) @@ -1311,6 +1308,9 @@ static int ef4_ethtool_get_module_info(struct net_device *net_dev, } const struct ethtool_ops ef4_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USECS_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = ef4_ethtool_get_drvinfo, .get_regs_len = ef4_ethtool_get_regs_len, .get_regs = ef4_ethtool_get_regs, diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index b836315bac87..b084e623b5f4 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -91,6 +91,12 @@ #define EFX_RX_BUF_ALIGNMENT 4 #endif +/* Non-standard XDP_PACKET_HEADROOM and tailroom to satisfy XDP_REDIRECT and + * still fit two standard MTU size packets into a single 4K page. + */ +#define EFX_XDP_HEADROOM 128 +#define EFX_XDP_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + /* Forward declare Precision Time Protocol (PTP) support structure. */ struct efx_ptp_data; struct hwtstamp_config; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index a2042f16babc..260352d97d9d 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -302,7 +302,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, efx->rx_prefix_size); xdp.data = *ehp; - xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; + xdp.data_hard_start = xdp.data - EFX_XDP_HEADROOM; /* No support yet for XDP metadata */ xdp_set_data_meta_invalid(&xdp); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index ee8beb87bdc1..e10c23833515 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -412,10 +412,10 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->dma_addr = dma_addr + efx->rx_ip_align + - XDP_PACKET_HEADROOM; + EFX_XDP_HEADROOM; rx_buf->page = page; rx_buf->page_offset = page_offset + efx->rx_ip_align + - XDP_PACKET_HEADROOM; + EFX_XDP_HEADROOM; rx_buf->len = efx->rx_dma_len; rx_buf->flags = 0; ++rx_queue->added_count; @@ -433,7 +433,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) void efx_rx_config_page_split(struct efx_nic *efx) { efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + - XDP_PACKET_HEADROOM, + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM, EFX_RX_BUF_ALIGNMENT); efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 58b9b7ce7195..a5a0fb60193a 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -589,6 +589,8 @@ static void netsec_et_set_msglevel(struct net_device *dev, u32 datum) } static const struct ethtool_ops netsec_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = netsec_et_get_drvinfo, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 9bdbf589d93f..386663208c23 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -34,6 +34,11 @@ #define DWMAC_CORE_5_00 0x50 #define DWMAC_CORE_5_10 0x51 #define DWXGMAC_CORE_2_10 0x21 +#define DWXLGMAC_CORE_2_00 0x20 + +/* Device ID */ +#define DWXGMAC_ID 0x76 +#define DWXLGMAC_ID 0x27 #define STMMAC_CHAN0 0 /* Always supported and default for all chips */ @@ -426,6 +431,12 @@ struct mac_link { u32 speed5000; u32 speed10000; } xgmii; + struct { + u32 speed25000; + u32 speed40000; + u32 speed50000; + u32 speed100000; + } xlgmii; }; struct mii_regs { @@ -459,6 +470,7 @@ struct mac_device_info { unsigned int pcs; unsigned int pmt; unsigned int ps; + unsigned int xlgmac; }; struct stmmac_rx_routing { @@ -470,6 +482,7 @@ int dwmac100_setup(struct stmmac_priv *priv); int dwmac1000_setup(struct stmmac_priv *priv); int dwmac4_setup(struct stmmac_priv *priv); int dwxgmac2_setup(struct stmmac_priv *priv); +int dwxlgmac2_setup(struct stmmac_priv *priv); void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 688d36095333..cb87d31a99df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -16,19 +16,14 @@ int dwmac_dma_reset(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_BUS_MODE); - int err; /* DMA SW reset */ value |= DMA_BUS_MODE_SFT_RESET; writel(value, ioaddr + DMA_BUS_MODE); - err = readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, + return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, !(value & DMA_BUS_MODE_SFT_RESET), 10000, 100000); - if (err) - return -EBUSY; - - return 0; } /* CSR1 enables the transmit DMA to check for new descriptor */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 67b754a56288..0e4575f7bedb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -9,6 +9,7 @@ #include <linux/iopoll.h> #include "stmmac.h" #include "stmmac_ptp.h" +#include "dwxlgmac2.h" #include "dwxgmac2.h" static void dwxgmac2_core_init(struct mac_device_info *hw, @@ -1485,6 +1486,67 @@ const struct stmmac_ops dwxgmac210_ops = { .fpe_configure = dwxgmac3_fpe_configure, }; +static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode, + u32 queue) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value; + + value = readl(ioaddr + XLGMAC_RXQ_ENABLE_CTRL0) & ~XGMAC_RXQEN(queue); + if (mode == MTL_QUEUE_AVB) + value |= 0x1 << XGMAC_RXQEN_SHIFT(queue); + else if (mode == MTL_QUEUE_DCB) + value |= 0x2 << XGMAC_RXQEN_SHIFT(queue); + writel(value, ioaddr + XLGMAC_RXQ_ENABLE_CTRL0); +} + +const struct stmmac_ops dwxlgmac2_ops = { + .core_init = dwxgmac2_core_init, + .set_mac = dwxgmac2_set_mac, + .rx_ipc = dwxgmac2_rx_ipc, + .rx_queue_enable = dwxlgmac2_rx_queue_enable, + .rx_queue_prio = dwxgmac2_rx_queue_prio, + .tx_queue_prio = dwxgmac2_tx_queue_prio, + .rx_queue_routing = NULL, + .prog_mtl_rx_algorithms = dwxgmac2_prog_mtl_rx_algorithms, + .prog_mtl_tx_algorithms = dwxgmac2_prog_mtl_tx_algorithms, + .set_mtl_tx_queue_weight = dwxgmac2_set_mtl_tx_queue_weight, + .map_mtl_to_dma = dwxgmac2_map_mtl_to_dma, + .config_cbs = dwxgmac2_config_cbs, + .dump_regs = dwxgmac2_dump_regs, + .host_irq_status = dwxgmac2_host_irq_status, + .host_mtl_irq_status = dwxgmac2_host_mtl_irq_status, + .flow_ctrl = dwxgmac2_flow_ctrl, + .pmt = dwxgmac2_pmt, + .set_umac_addr = dwxgmac2_set_umac_addr, + .get_umac_addr = dwxgmac2_get_umac_addr, + .set_eee_mode = dwxgmac2_set_eee_mode, + .reset_eee_mode = dwxgmac2_reset_eee_mode, + .set_eee_timer = dwxgmac2_set_eee_timer, + .set_eee_pls = dwxgmac2_set_eee_pls, + .pcs_ctrl_ane = NULL, + .pcs_rane = NULL, + .pcs_get_adv_lp = NULL, + .debug = NULL, + .set_filter = dwxgmac2_set_filter, + .safety_feat_config = dwxgmac3_safety_feat_config, + .safety_feat_irq_status = dwxgmac3_safety_feat_irq_status, + .safety_feat_dump = dwxgmac3_safety_feat_dump, + .set_mac_loopback = dwxgmac2_set_mac_loopback, + .rss_configure = dwxgmac2_rss_configure, + .update_vlan_hash = dwxgmac2_update_vlan_hash, + .rxp_config = dwxgmac3_rxp_config, + .get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp, + .flex_pps_config = dwxgmac2_flex_pps_config, + .sarc_configure = dwxgmac2_sarc_configure, + .enable_vlan = dwxgmac2_enable_vlan, + .config_l3_filter = dwxgmac2_config_l3_filter, + .config_l4_filter = dwxgmac2_config_l4_filter, + .set_arp_offload = dwxgmac2_set_arp_offload, + .est_configure = dwxgmac3_est_configure, + .fpe_configure = dwxgmac3_fpe_configure, +}; + int dwxgmac2_setup(struct stmmac_priv *priv) { struct mac_device_info *mac = priv->hw; @@ -1521,3 +1583,40 @@ int dwxgmac2_setup(struct stmmac_priv *priv) return 0; } + +int dwxlgmac2_setup(struct stmmac_priv *priv) +{ + struct mac_device_info *mac = priv->hw; + + dev_info(priv->device, "\tXLGMAC\n"); + + priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->pcsr = priv->ioaddr; + mac->multicast_filter_bins = priv->plat->multicast_filter_bins; + mac->unicast_filter_entries = priv->plat->unicast_filter_entries; + mac->mcast_bits_log2 = 0; + + if (mac->multicast_filter_bins) + mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); + + mac->link.duplex = 0; + mac->link.speed1000 = XLGMAC_CONFIG_SS_1000; + mac->link.speed2500 = XLGMAC_CONFIG_SS_2500; + mac->link.xgmii.speed10000 = XLGMAC_CONFIG_SS_10G; + mac->link.xlgmii.speed25000 = XLGMAC_CONFIG_SS_25G; + mac->link.xlgmii.speed40000 = XLGMAC_CONFIG_SS_40G; + mac->link.xlgmii.speed50000 = XLGMAC_CONFIG_SS_50G; + mac->link.xlgmii.speed100000 = XLGMAC_CONFIG_SS_100G; + mac->link.speed_mask = XLGMAC_CONFIG_SS; + + mac->mii.addr = XGMAC_MDIO_ADDR; + mac->mii.data = XGMAC_MDIO_DATA; + mac->mii.addr_shift = 16; + mac->mii.addr_mask = GENMASK(20, 16); + mac->mii.reg_shift = 0; + mac->mii.reg_mask = GENMASK(15, 0); + mac->mii.clk_csr_shift = 19; + mac->mii.clk_csr_mask = GENMASK(21, 19); + + return 0; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxlgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxlgmac2.h new file mode 100644 index 000000000000..726090d49221 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwxlgmac2.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare XLGMAC definitions. + */ + +#ifndef __STMMAC_DWXLGMAC2_H__ +#define __STMMAC_DWXLGMAC2_H__ + +/* MAC Registers */ +#define XLGMAC_CONFIG_SS GENMASK(30, 28) +#define XLGMAC_CONFIG_SS_SHIFT 28 +#define XLGMAC_CONFIG_SS_40G (0x0 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_25G (0x1 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_50G (0x2 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_100G (0x3 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_10G (0x4 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_2500 (0x6 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_CONFIG_SS_1000 (0x7 << XLGMAC_CONFIG_SS_SHIFT) +#define XLGMAC_RXQ_ENABLE_CTRL0 0x00000140 + +#endif /* __STMMAC_DWXLGMAC2_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 3af2e5015245..bb7114f970f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -23,6 +23,18 @@ static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) return reg & GENMASK(7, 0); } +static u32 stmmac_get_dev_id(struct stmmac_priv *priv, u32 id_reg) +{ + u32 reg = readl(priv->ioaddr + id_reg); + + if (!reg) { + dev_info(priv->device, "Version ID not available\n"); + return 0x0; + } + + return (reg & GENMASK(15, 8)) >> 8; +} + static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) { struct mac_device_info *mac = priv->hw; @@ -69,11 +81,18 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv) return 0; } +static int stmmac_dwxlgmac_quirks(struct stmmac_priv *priv) +{ + priv->hw->xlgmac = true; + return 0; +} + static const struct stmmac_hwif_entry { bool gmac; bool gmac4; bool xgmac; u32 min_id; + u32 dev_id; const struct stmmac_regs_off regs; const void *desc; const void *dma; @@ -199,6 +218,7 @@ static const struct stmmac_hwif_entry { .gmac4 = false, .xgmac = true, .min_id = DWXGMAC_CORE_2_10, + .dev_id = DWXGMAC_ID, .regs = { .ptp_off = PTP_XGMAC_OFFSET, .mmc_off = MMC_XGMAC_OFFSET, @@ -212,6 +232,25 @@ static const struct stmmac_hwif_entry { .mmc = &dwxgmac_mmc_ops, .setup = dwxgmac2_setup, .quirks = NULL, + }, { + .gmac = false, + .gmac4 = false, + .xgmac = true, + .min_id = DWXLGMAC_CORE_2_00, + .dev_id = DWXLGMAC_ID, + .regs = { + .ptp_off = PTP_XGMAC_OFFSET, + .mmc_off = MMC_XGMAC_OFFSET, + }, + .desc = &dwxgmac210_desc_ops, + .dma = &dwxgmac210_dma_ops, + .mac = &dwxlgmac2_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .tc = &dwmac510_tc_ops, + .mmc = &dwxgmac_mmc_ops, + .setup = dwxlgmac2_setup, + .quirks = stmmac_dwxlgmac_quirks, }, }; @@ -223,13 +262,15 @@ int stmmac_hwif_init(struct stmmac_priv *priv) const struct stmmac_hwif_entry *entry; struct mac_device_info *mac; bool needs_setup = true; + u32 id, dev_id = 0; int i, ret; - u32 id; if (needs_gmac) { id = stmmac_get_id(priv, GMAC_VERSION); } else if (needs_gmac4 || needs_xgmac) { id = stmmac_get_id(priv, GMAC4_VERSION); + if (needs_xgmac) + dev_id = stmmac_get_dev_id(priv, GMAC4_VERSION); } else { id = 0; } @@ -267,6 +308,8 @@ int stmmac_hwif_init(struct stmmac_priv *priv) /* Use synopsys_id var because some setups can override this */ if (priv->synopsys_id < entry->min_id) continue; + if (needs_xgmac && (dev_id ^ entry->dev_id)) + continue; /* Only use generic HW helpers if needed */ mac->desc = mac->desc ? : entry->desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index c71dd99c8abf..fc350149ba34 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -605,6 +605,7 @@ extern const struct stmmac_dma_ops dwmac410_dma_ops; extern const struct stmmac_ops dwmac510_ops; extern const struct stmmac_tc_ops dwmac510_tc_ops; extern const struct stmmac_ops dwxgmac210_ops; +extern const struct stmmac_ops dwxlgmac2_ops; extern const struct stmmac_dma_ops dwxgmac210_dma_ops; extern const struct stmmac_desc_ops dwxgmac210_desc_ops; extern const struct stmmac_mmc_ops dwmac_mmc_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f26699d9a050..0e8c80f23557 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -849,6 +849,38 @@ static void stmmac_validate(struct phylink_config *config, phylink_set(mac_supported, 10000baseKX4_Full); phylink_set(mac_supported, 10000baseKR_Full); } + if (!max_speed || (max_speed >= 25000)) { + phylink_set(mac_supported, 25000baseCR_Full); + phylink_set(mac_supported, 25000baseKR_Full); + phylink_set(mac_supported, 25000baseSR_Full); + } + if (!max_speed || (max_speed >= 40000)) { + phylink_set(mac_supported, 40000baseKR4_Full); + phylink_set(mac_supported, 40000baseCR4_Full); + phylink_set(mac_supported, 40000baseSR4_Full); + phylink_set(mac_supported, 40000baseLR4_Full); + } + if (!max_speed || (max_speed >= 50000)) { + phylink_set(mac_supported, 50000baseCR2_Full); + phylink_set(mac_supported, 50000baseKR2_Full); + phylink_set(mac_supported, 50000baseSR2_Full); + phylink_set(mac_supported, 50000baseKR_Full); + phylink_set(mac_supported, 50000baseSR_Full); + phylink_set(mac_supported, 50000baseCR_Full); + phylink_set(mac_supported, 50000baseLR_ER_FR_Full); + phylink_set(mac_supported, 50000baseDR_Full); + } + if (!max_speed || (max_speed >= 100000)) { + phylink_set(mac_supported, 100000baseKR4_Full); + phylink_set(mac_supported, 100000baseSR4_Full); + phylink_set(mac_supported, 100000baseCR4_Full); + phylink_set(mac_supported, 100000baseLR4_ER4_Full); + phylink_set(mac_supported, 100000baseKR2_Full); + phylink_set(mac_supported, 100000baseSR2_Full); + phylink_set(mac_supported, 100000baseCR2_Full); + phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full); + phylink_set(mac_supported, 100000baseDR2_Full); + } } /* Half-Duplex can only work with single queue */ @@ -929,6 +961,32 @@ static void stmmac_mac_link_up(struct phylink_config *config, default: return; } + } else if (interface == PHY_INTERFACE_MODE_XLGMII) { + switch (speed) { + case SPEED_100000: + ctrl |= priv->hw->link.xlgmii.speed100000; + break; + case SPEED_50000: + ctrl |= priv->hw->link.xlgmii.speed50000; + break; + case SPEED_40000: + ctrl |= priv->hw->link.xlgmii.speed40000; + break; + case SPEED_25000: + ctrl |= priv->hw->link.xlgmii.speed25000; + break; + case SPEED_10000: + ctrl |= priv->hw->link.xgmii.speed10000; + break; + case SPEED_2500: + ctrl |= priv->hw->link.speed2500; + break; + case SPEED_1000: + ctrl |= priv->hw->link.speed1000; + break; + default: + return; + } } else { switch (speed) { case SPEED_2500: diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index 07dbe4f5456e..63d6c85a59e3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -1387,7 +1387,7 @@ static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src, cls->rule = rule; rule->action.entries[0].id = FLOW_ACTION_DROP; - rule->action.entries[0].hw_stats_type = FLOW_ACTION_HW_STATS_TYPE_ANY; + rule->action.entries[0].hw_stats_type = FLOW_ACTION_HW_STATS_ANY; rule->action.num_entries = 1; attr.dst = priv->dev->dev_addr; @@ -1516,7 +1516,7 @@ static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src, cls->rule = rule; rule->action.entries[0].id = FLOW_ACTION_DROP; - rule->action.entries[0].hw_stats_type = FLOW_ACTION_HW_STATS_TYPE_ANY; + rule->action.entries[0].hw_stats_type = FLOW_ACTION_HW_STATS_ANY; rule->action.num_entries = 1; attr.dst = priv->dev->dev_addr; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index a0e6118444b0..3d747846f482 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -376,7 +376,7 @@ static int tc_parse_flow_actions(struct stmmac_priv *priv, if (!flow_action_has_entries(action)) return -EINVAL; - if (!flow_action_basic_hw_stats_types_check(action, extack)) + if (!flow_action_basic_hw_stats_check(action, extack)) return -EOPNOTSUPP; flow_action_for_each(i, act, action) { diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c index fde722136869..bc198eadfcab 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c @@ -151,7 +151,6 @@ static int xlgmac_ethtool_get_coalesce(struct net_device *netdev, { struct xlgmac_pdata *pdata = netdev_priv(netdev); - memset(ec, 0, sizeof(struct ethtool_coalesce)); ec->rx_coalesce_usecs = pdata->rx_usecs; ec->rx_max_coalesced_frames = pdata->rx_frames; ec->tx_max_coalesced_frames = pdata->tx_frames; @@ -167,20 +166,6 @@ static int xlgmac_ethtool_set_coalesce(struct net_device *netdev, unsigned int rx_frames, rx_riwt, rx_usecs; unsigned int tx_frames; - /* Check for not supported parameters */ - if ((ec->rx_coalesce_usecs_irq) || (ec->rx_max_coalesced_frames_irq) || - (ec->tx_coalesce_usecs) || (ec->tx_coalesce_usecs_high) || - (ec->tx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) || - (ec->stats_block_coalesce_usecs) || (ec->pkt_rate_low) || - (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) || - (ec->rx_max_coalesced_frames_low) || (ec->rx_coalesce_usecs_low) || - (ec->tx_coalesce_usecs_low) || (ec->tx_max_coalesced_frames_low) || - (ec->pkt_rate_high) || (ec->rx_coalesce_usecs_high) || - (ec->rx_max_coalesced_frames_high) || - (ec->tx_max_coalesced_frames_high) || - (ec->rate_sample_interval)) - return -EOPNOTSUPP; - rx_usecs = ec->rx_coalesce_usecs; rx_riwt = hw_ops->usec_to_riwt(pdata, rx_usecs); rx_frames = ec->rx_max_coalesced_frames; @@ -257,6 +242,8 @@ static void xlgmac_ethtool_get_ethtool_stats(struct net_device *netdev, } static const struct ethtool_ops xlgmac_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = xlgmac_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = xlgmac_ethtool_get_msglevel, diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index 0f8a924fc60c..40a2ce0ca808 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -2373,6 +2373,8 @@ static void bdx_get_ethtool_stats(struct net_device *netdev, static void bdx_set_ethtool_ops(struct net_device *netdev) { static const struct ethtool_ops bdx_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = bdx_get_drvinfo, .get_link = ethtool_op_get_link, .get_coalesce = bdx_get_coalesce, diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 6ae4a72e6f43..c2c5bf87da01 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1211,6 +1211,7 @@ static int cpsw_set_channels(struct net_device *ndev, } static const struct ethtool_ops cpsw_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, .set_msglevel = cpsw_set_msglevel, diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 71215db7934b..9209e613257d 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1175,6 +1175,7 @@ static int cpsw_set_channels(struct net_device *ndev, } static const struct ethtool_ops cpsw_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, .set_msglevel = cpsw_set_msglevel, diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 75d4e16c692b..de282531f68b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -481,6 +481,7 @@ static int emac_set_coalesce(struct net_device *ndev, * Ethtool support for EMAC adapter */ static const struct ethtool_ops ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = emac_get_drvinfo, .get_link = ethtool_op_get_link, .get_coalesce = emac_get_coalesce, diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index dc022cd5bc42..3e313e71ae36 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1314,25 +1314,6 @@ static int ll_temac_ethtools_set_coalesce(struct net_device *ndev, return -EFAULT; } - if (ec->rx_coalesce_usecs_irq || - ec->rx_max_coalesced_frames_irq || - ec->tx_coalesce_usecs_irq || - ec->tx_max_coalesced_frames_irq || - ec->stats_block_coalesce_usecs || - ec->use_adaptive_rx_coalesce || - ec->use_adaptive_tx_coalesce || - ec->pkt_rate_low || - ec->rx_coalesce_usecs_low || - ec->rx_max_coalesced_frames_low || - ec->tx_coalesce_usecs_low || - ec->tx_max_coalesced_frames_low || - ec->pkt_rate_high || - ec->rx_coalesce_usecs_high || - ec->rx_max_coalesced_frames_high || - ec->tx_coalesce_usecs_high || - ec->tx_max_coalesced_frames_high || - ec->rate_sample_interval) - return -EOPNOTSUPP; if (ec->rx_max_coalesced_frames) lp->coalesce_count_rx = ec->rx_max_coalesced_frames; if (ec->tx_max_coalesced_frames) @@ -1351,6 +1332,8 @@ static int ll_temac_ethtools_set_coalesce(struct net_device *ndev, } static const struct ethtool_ops temac_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, .nway_reset = phy_ethtool_nway_reset, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index c2f4c5ca2e80..e2f3e2b0cec7 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1309,27 +1309,6 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev, return -EFAULT; } - if ((ecoalesce->rx_coalesce_usecs) || - (ecoalesce->rx_coalesce_usecs_irq) || - (ecoalesce->rx_max_coalesced_frames_irq) || - (ecoalesce->tx_coalesce_usecs) || - (ecoalesce->tx_coalesce_usecs_irq) || - (ecoalesce->tx_max_coalesced_frames_irq) || - (ecoalesce->stats_block_coalesce_usecs) || - (ecoalesce->use_adaptive_rx_coalesce) || - (ecoalesce->use_adaptive_tx_coalesce) || - (ecoalesce->pkt_rate_low) || - (ecoalesce->rx_coalesce_usecs_low) || - (ecoalesce->rx_max_coalesced_frames_low) || - (ecoalesce->tx_coalesce_usecs_low) || - (ecoalesce->tx_max_coalesced_frames_low) || - (ecoalesce->pkt_rate_high) || - (ecoalesce->rx_coalesce_usecs_high) || - (ecoalesce->rx_max_coalesced_frames_high) || - (ecoalesce->tx_coalesce_usecs_high) || - (ecoalesce->tx_max_coalesced_frames_high) || - (ecoalesce->rate_sample_interval)) - return -EOPNOTSUPP; if (ecoalesce->rx_max_coalesced_frames) lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; if (ecoalesce->tx_max_coalesced_frames) @@ -1357,6 +1336,7 @@ axienet_ethtools_set_link_ksettings(struct net_device *ndev, } static const struct ethtool_ops axienet_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index b8cb7cadbf75..9f0d2a93379c 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -1,9 +1,9 @@ config QCOM_IPA tristate "Qualcomm IPA support" depends on ARCH_QCOM && 64BIT && NET + depends on QCOM_Q6V5_MSS select QCOM_QMI_HELPERS select QCOM_MDT_LOADER - default QCOM_Q6V5_COMMON help Choose Y or M here to include support for the Qualcomm IP Accelerator (IPA), a hardware block present in some diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 915b4cd05dd2..217cbf337ad7 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -9,7 +9,6 @@ #include <linux/slab.h> #include <linux/bitfield.h> #include <linux/if_rmnet.h> -#include <linux/version.h> #include <linux/dma-direction.h> #include "gsi.h" diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index d6e7f257e99d..28998dcce3d2 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -942,7 +942,6 @@ static struct platform_driver ipa_driver = { .remove = ipa_remove, .driver = { .name = "ipa", - .owner = THIS_MODULE, .pm = &ipa_pm_ops, .of_match_table = ipa_match, }, diff --git a/drivers/net/phy/mdio-xpcs.c b/drivers/net/phy/mdio-xpcs.c index 973f588146f7..2f4cdf807160 100644 --- a/drivers/net/phy/mdio-xpcs.c +++ b/drivers/net/phy/mdio-xpcs.c @@ -14,6 +14,7 @@ #define SYNOPSYS_XPCS_USXGMII_ID 0x7996ced0 #define SYNOPSYS_XPCS_10GKR_ID 0x7996ced0 +#define SYNOPSYS_XPCS_XLGMII_ID 0x7996ced0 #define SYNOPSYS_XPCS_MASK 0xffffffff /* Vendor regs access */ @@ -74,6 +75,36 @@ static const int xpcs_10gkr_features[] = { __ETHTOOL_LINK_MODE_MASK_NBITS, }; +static const int xpcs_xlgmii_features[] = { + ETHTOOL_LINK_MODE_Pause_BIT, + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT, + __ETHTOOL_LINK_MODE_MASK_NBITS, +}; + static const phy_interface_t xpcs_usxgmii_interfaces[] = { PHY_INTERFACE_MODE_USXGMII, PHY_INTERFACE_MODE_MAX, @@ -84,6 +115,11 @@ static const phy_interface_t xpcs_10gkr_interfaces[] = { PHY_INTERFACE_MODE_MAX, }; +static const phy_interface_t xpcs_xlgmii_interfaces[] = { + PHY_INTERFACE_MODE_XLGMII, + PHY_INTERFACE_MODE_MAX, +}; + static struct xpcs_id { u32 id; u32 mask; @@ -100,6 +136,11 @@ static struct xpcs_id { .mask = SYNOPSYS_XPCS_MASK, .supported = xpcs_10gkr_features, .interface = xpcs_10gkr_interfaces, + }, { + .id = SYNOPSYS_XPCS_XLGMII_ID, + .mask = SYNOPSYS_XPCS_MASK, + .supported = xpcs_xlgmii_features, + .interface = xpcs_xlgmii_interfaces, }, }; @@ -458,6 +499,60 @@ static void xpcs_resolve_lpa(struct mdio_xpcs_args *xpcs, state->duplex = DUPLEX_FULL; } +static int xpcs_get_max_xlgmii_speed(struct mdio_xpcs_args *xpcs, + struct phylink_link_state *state) +{ + unsigned long *adv = state->advertising; + int speed = SPEED_UNKNOWN; + int bit; + + for_each_set_bit(bit, adv, __ETHTOOL_LINK_MODE_MASK_NBITS) { + int new_speed = SPEED_UNKNOWN; + + switch (bit) { + case ETHTOOL_LINK_MODE_25000baseCR_Full_BIT: + case ETHTOOL_LINK_MODE_25000baseKR_Full_BIT: + case ETHTOOL_LINK_MODE_25000baseSR_Full_BIT: + new_speed = SPEED_25000; + break; + case ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT: + case ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT: + case ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT: + case ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT: + new_speed = SPEED_40000; + break; + case ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseKR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseSR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseCR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT: + case ETHTOOL_LINK_MODE_50000baseDR_Full_BIT: + new_speed = SPEED_50000; + break; + case ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT: + case ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT: + new_speed = SPEED_100000; + break; + default: + continue; + } + + if (new_speed > speed) + speed = new_speed; + } + + return speed; +} + static void xpcs_resolve_pma(struct mdio_xpcs_args *xpcs, struct phylink_link_state *state) { @@ -468,6 +563,9 @@ static void xpcs_resolve_pma(struct mdio_xpcs_args *xpcs, case PHY_INTERFACE_MODE_10GKR: state->speed = SPEED_10000; break; + case PHY_INTERFACE_MODE_XLGMII: + state->speed = xpcs_get_max_xlgmii_speed(xpcs, state); + break; default: state->speed = SPEED_UNKNOWN; break; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 3ab9ca7614d1..522760c8bca6 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -825,6 +825,38 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) EXPORT_SYMBOL(__mdiobus_write); /** + * __mdiobus_modify_changed - Unlocked version of the mdiobus_modify function + * @bus: the mii_bus struct + * @addr: the phy address + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Read, modify, and if any change, write the register value back to the + * device. Any error returns a negative number. + * + * NOTE: MUST NOT be called from interrupt context. + */ +int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, + u16 mask, u16 set) +{ + int new, ret; + + ret = __mdiobus_read(bus, addr, regnum); + if (ret < 0) + return ret; + + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __mdiobus_write(bus, addr, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__mdiobus_modify_changed); + +/** * mdiobus_read_nested - Nested version of the mdiobus_read function * @bus: the mii_bus struct * @addr: the phy address @@ -841,7 +873,8 @@ int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum) { int retval; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); retval = __mdiobus_read(bus, addr, regnum); @@ -865,7 +898,8 @@ int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) { int retval; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock(&bus->mdio_lock); retval = __mdiobus_read(bus, addr, regnum); @@ -893,7 +927,8 @@ int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val) { int err; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); err = __mdiobus_write(bus, addr, regnum, val); @@ -918,7 +953,8 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) { int err; - BUG_ON(in_interrupt()); + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; mutex_lock(&bus->mdio_lock); err = __mdiobus_write(bus, addr, regnum, val); @@ -929,6 +965,30 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) EXPORT_SYMBOL(mdiobus_write); /** + * mdiobus_modify - Convenience function for modifying a given mdio device + * register + * @bus: the mii_bus struct + * @addr: the phy address + * @regnum: register number to write + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + */ +int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set) +{ + int err; + + if (WARN_ON_ONCE(in_interrupt())) + return -EINVAL; + + mutex_lock(&bus->mdio_lock); + err = __mdiobus_modify_changed(bus, addr, regnum, mask, set); + mutex_unlock(&bus->mdio_lock); + + return err < 0 ? err : 0; +} +EXPORT_SYMBOL_GPL(mdiobus_modify); + +/** * mdio_bus_match - determine if given MDIO driver supports the given * MDIO device * @dev: target MDIO device diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 29ccb2c9c095..25729302714c 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -161,6 +161,20 @@ enum rgmii_rx_clock_delay { /* Extended Page 2 Registers */ #define MSCC_PHY_CU_PMD_TX_CNTL 16 +#define MSCC_PHY_RGMII_SETTINGS 18 +#define RGMII_SKEW_RX_POS 1 +#define RGMII_SKEW_TX_POS 4 + +/* RGMII skew values, in ns */ +#define VSC8584_RGMII_SKEW_0_2 0 +#define VSC8584_RGMII_SKEW_0_8 1 +#define VSC8584_RGMII_SKEW_1_1 2 +#define VSC8584_RGMII_SKEW_1_7 3 +#define VSC8584_RGMII_SKEW_2_0 4 +#define VSC8584_RGMII_SKEW_2_3 5 +#define VSC8584_RGMII_SKEW_2_6 6 +#define VSC8584_RGMII_SKEW_3_4 7 + #define MSCC_PHY_RGMII_CNTL 20 #define RGMII_RX_CLK_DELAY_MASK 0x0070 #define RGMII_RX_CLK_DELAY_POS 4 @@ -241,6 +255,7 @@ enum rgmii_rx_clock_delay { #define MAC_CFG_MASK 0xc000 #define MAC_CFG_SGMII 0x0000 #define MAC_CFG_QSGMII 0x4000 +#define MAC_CFG_RGMII 0x8000 /* Test page Registers */ #define MSCC_PHY_TEST_PAGE_5 5 diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index cb4d65f81095..5d78732de702 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -1288,6 +1288,32 @@ static bool vsc8584_is_pkg_init(struct phy_device *phydev, bool reversed) return false; } +static void vsc8584_rgmii_set_skews(struct phy_device *phydev) +{ + u32 skew_rx, skew_tx; + + /* We first set the Rx and Tx skews to their default value in h/w + * (0.2 ns). + */ + skew_rx = VSC8584_RGMII_SKEW_0_2; + skew_tx = VSC8584_RGMII_SKEW_0_2; + + /* We then set the skews based on the interface mode. */ + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + skew_rx = VSC8584_RGMII_SKEW_2_0; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + skew_tx = VSC8584_RGMII_SKEW_2_0; + + /* Finally we apply the skews configuration. */ + phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2, + MSCC_PHY_RGMII_SETTINGS, + (0x7 << RGMII_SKEW_RX_POS) | (0x7 << RGMII_SKEW_TX_POS), + (skew_rx << RGMII_SKEW_RX_POS) | + (skew_tx << RGMII_SKEW_TX_POS)); +} + static int vsc8584_config_init(struct phy_device *phydev) { struct vsc8531_private *vsc8531 = phydev->priv; @@ -1360,27 +1386,35 @@ static int vsc8584_config_init(struct phy_device *phydev) val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK); val &= ~MAC_CFG_MASK; - if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) + if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) { val |= MAC_CFG_QSGMII; - else + } else if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { val |= MAC_CFG_SGMII; + } else if (phy_interface_is_rgmii(phydev)) { + val |= MAC_CFG_RGMII; + } else { + ret = -EINVAL; + goto err; + } ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val); if (ret) goto err; - val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT | - PROC_CMD_READ_MOD_WRITE_PORT; - if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) - val |= PROC_CMD_QSGMII_MAC; - else - val |= PROC_CMD_SGMII_MAC; + if (!phy_interface_is_rgmii(phydev)) { + val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT | + PROC_CMD_READ_MOD_WRITE_PORT; + if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) + val |= PROC_CMD_QSGMII_MAC; + else + val |= PROC_CMD_SGMII_MAC; - ret = vsc8584_cmd(phydev, val); - if (ret) - goto err; + ret = vsc8584_cmd(phydev, val); + if (ret) + goto err; - usleep_range(10000, 20000); + usleep_range(10000, 20000); + } /* Disable SerDes for 100Base-FX */ ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF | @@ -1411,6 +1445,11 @@ static int vsc8584_config_init(struct phy_device *phydev) val |= (MEDIA_OP_MODE_COPPER << MEDIA_OP_MODE_POS) | (VSC8584_MAC_IF_SELECTION_SGMII << VSC8584_MAC_IF_SELECTION_POS); ret = phy_write(phydev, MSCC_PHY_EXT_PHY_CNTL_1, val); + if (ret) + return ret; + + if (phy_interface_is_rgmii(phydev)) + vsc8584_rgmii_set_skews(phydev); ret = genphy_soft_reset(phydev); if (ret) @@ -1429,11 +1468,21 @@ err: return ret; } -static int vsc8584_handle_interrupt(struct phy_device *phydev) +static irqreturn_t vsc8584_handle_interrupt(struct phy_device *phydev) { - vsc8584_handle_macsec_interrupt(phydev); - phy_mac_interrupt(phydev); - return 0; + int irq_status; + + irq_status = phy_read(phydev, MII_VSC85XX_INT_STATUS); + if (irq_status < 0 || !(irq_status & MII_VSC85XX_INT_MASK_MASK)) + return IRQ_NONE; + + if (irq_status & MII_VSC85XX_INT_MASK_EXT) + vsc8584_handle_macsec_interrupt(phydev); + + if (irq_status & MII_VSC85XX_INT_MASK_LINK_CHG) + phy_mac_interrupt(phydev); + + return IRQ_HANDLED; } static int vsc85xx_config_init(struct phy_device *phydev) diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index e083e7a76ada..94cd85b1e49b 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -489,37 +489,6 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) EXPORT_SYMBOL(phy_write_mmd); /** - * __phy_modify_changed() - Convenience function for modifying a PHY register - * @phydev: a pointer to a &struct phy_device - * @regnum: register number - * @mask: bit mask of bits to clear - * @set: bit mask of bits to set - * - * Unlocked helper function which allows a PHY register to be modified as - * new register value = (old register value & ~mask) | set - * - * Returns negative errno, 0 if there was no change, and 1 in case of change - */ -int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, - u16 set) -{ - int new, ret; - - ret = __phy_read(phydev, regnum); - if (ret < 0) - return ret; - - new = (ret & ~mask) | set; - if (new == ret) - return 0; - - ret = __phy_write(phydev, regnum, new); - - return ret < 0 ? ret : 1; -} -EXPORT_SYMBOL_GPL(__phy_modify_changed); - -/** * phy_modify_changed - Function for modifying a PHY register * @phydev: the phy_device struct * @regnum: register number to modify diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 355bfdef48d2..d71212a418f3 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -715,26 +715,24 @@ static int phy_disable_interrupts(struct phy_device *phydev) static irqreturn_t phy_interrupt(int irq, void *phy_dat) { struct phy_device *phydev = phy_dat; + struct phy_driver *drv = phydev->drv; - if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) + if (drv->handle_interrupt) + return drv->handle_interrupt(phydev); + + if (drv->did_interrupt && !drv->did_interrupt(phydev)) return IRQ_NONE; - if (phydev->drv->handle_interrupt) { - if (phydev->drv->handle_interrupt(phydev)) - goto phy_err; - } else { - /* reschedule state queue work to run as soon as possible */ - phy_trigger_machine(phydev); - } + /* reschedule state queue work to run as soon as possible */ + phy_trigger_machine(phydev); /* did_interrupt() may have cleared the interrupt already */ - if (!phydev->drv->did_interrupt && phy_clear_interrupt(phydev)) - goto phy_err; - return IRQ_HANDLED; + if (!drv->did_interrupt && phy_clear_interrupt(phydev)) { + phy_error(phydev); + return IRQ_NONE; + } -phy_err: - phy_error(phydev); - return IRQ_NONE; + return IRQ_HANDLED; } /** diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 60f32b354013..fed0c5907c6a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2068,4 +2068,240 @@ void phylink_helper_basex_speed(struct phylink_link_state *state) } EXPORT_SYMBOL_GPL(phylink_helper_basex_speed); +static void phylink_decode_c37_word(struct phylink_link_state *state, + uint16_t config_reg, int speed) +{ + bool tx_pause, rx_pause; + int fd_bit; + + if (speed == SPEED_2500) + fd_bit = ETHTOOL_LINK_MODE_2500baseX_Full_BIT; + else + fd_bit = ETHTOOL_LINK_MODE_1000baseX_Full_BIT; + + mii_lpa_mod_linkmode_x(state->lp_advertising, config_reg, fd_bit); + + if (linkmode_test_bit(fd_bit, state->advertising) && + linkmode_test_bit(fd_bit, state->lp_advertising)) { + state->speed = speed; + state->duplex = DUPLEX_FULL; + } else { + /* negotiation failure */ + state->link = false; + } + + linkmode_resolve_pause(state->advertising, state->lp_advertising, + &tx_pause, &rx_pause); + + if (tx_pause) + state->pause |= MLO_PAUSE_TX; + if (rx_pause) + state->pause |= MLO_PAUSE_RX; +} + +static void phylink_decode_sgmii_word(struct phylink_link_state *state, + uint16_t config_reg) +{ + if (!(config_reg & LPA_SGMII_LINK)) { + state->link = false; + return; + } + + switch (config_reg & LPA_SGMII_SPD_MASK) { + case LPA_SGMII_10: + state->speed = SPEED_10; + break; + case LPA_SGMII_100: + state->speed = SPEED_100; + break; + case LPA_SGMII_1000: + state->speed = SPEED_1000; + break; + default: + state->link = false; + return; + } + if (config_reg & LPA_SGMII_FULL_DUPLEX) + state->duplex = DUPLEX_FULL; + else + state->duplex = DUPLEX_HALF; +} + +/** + * phylink_mii_c22_pcs_get_state() - read the MAC PCS state + * @pcs: a pointer to a &struct mdio_device. + * @state: a pointer to a &struct phylink_link_state. + * + * Helper for MAC PCS supporting the 802.3 clause 22 register set for + * clause 37 negotiation and/or SGMII control. + * + * Read the MAC PCS state from the MII device configured in @config and + * parse the Clause 37 or Cisco SGMII link partner negotiation word into + * the phylink @state structure. This is suitable to be directly plugged + * into the mac_pcs_get_state() member of the struct phylink_mac_ops + * structure. + */ +void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state) +{ + struct mii_bus *bus = pcs->bus; + int addr = pcs->addr; + int bmsr, lpa; + + bmsr = mdiobus_read(bus, addr, MII_BMSR); + lpa = mdiobus_read(bus, addr, MII_LPA); + if (bmsr < 0 || lpa < 0) { + state->link = false; + return; + } + + state->link = !!(bmsr & BMSR_LSTATUS); + state->an_complete = !!(bmsr & BMSR_ANEGCOMPLETE); + if (!state->link) + return; + + switch (state->interface) { + case PHY_INTERFACE_MODE_1000BASEX: + phylink_decode_c37_word(state, lpa, SPEED_1000); + break; + + case PHY_INTERFACE_MODE_2500BASEX: + phylink_decode_c37_word(state, lpa, SPEED_2500); + break; + + case PHY_INTERFACE_MODE_SGMII: + phylink_decode_sgmii_word(state, lpa); + break; + + default: + state->link = false; + break; + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_get_state); + +/** + * phylink_mii_c22_pcs_set_advertisement() - configure the clause 37 PCS + * advertisement + * @pcs: a pointer to a &struct mdio_device. + * @state: a pointer to the state being configured. + * + * Helper for MAC PCS supporting the 802.3 clause 22 register set for + * clause 37 negotiation and/or SGMII control. + * + * Configure the clause 37 PCS advertisement as specified by @state. This + * does not trigger a renegotiation; phylink will do that via the + * mac_an_restart() method of the struct phylink_mac_ops structure. + * + * Returns negative error code on failure to configure the advertisement, + * zero if no change has been made, or one if the advertisement has changed. + */ +int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, + const struct phylink_link_state *state) +{ + struct mii_bus *bus = pcs->bus; + int addr = pcs->addr; + int val, ret; + u16 adv; + + switch (state->interface) { + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + adv = ADVERTISE_1000XFULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + state->advertising)) + adv |= ADVERTISE_1000XPAUSE; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + state->advertising)) + adv |= ADVERTISE_1000XPSE_ASYM; + + val = mdiobus_read(bus, addr, MII_ADVERTISE); + if (val < 0) + return val; + + if (val == adv) + return 0; + + ret = mdiobus_write(bus, addr, MII_ADVERTISE, adv); + if (ret < 0) + return ret; + + return 1; + + case PHY_INTERFACE_MODE_SGMII: + val = mdiobus_read(bus, addr, MII_ADVERTISE); + if (val < 0) + return val; + + if (val == 0x0001) + return 0; + + ret = mdiobus_write(bus, addr, MII_ADVERTISE, 0x0001); + if (ret < 0) + return ret; + + return 1; + + default: + /* Nothing to do for other modes */ + return 0; + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_set_advertisement); + +/** + * phylink_mii_c22_pcs_an_restart() - restart 802.3z autonegotiation + * @pcs: a pointer to a &struct mdio_device. + * + * Helper for MAC PCS supporting the 802.3 clause 22 register set for + * clause 37 negotiation. + * + * Restart the clause 37 negotiation with the link partner. This is + * suitable to be directly plugged into the mac_pcs_get_state() member + * of the struct phylink_mac_ops structure. + */ +void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs) +{ + struct mii_bus *bus = pcs->bus; + int val, addr = pcs->addr; + + val = mdiobus_read(bus, addr, MII_BMCR); + if (val >= 0) { + val |= BMCR_ANRESTART; + + mdiobus_write(bus, addr, MII_BMCR, val); + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_an_restart); + +#define C45_ADDR(d,a) (MII_ADDR_C45 | (d) << 16 | (a)) +void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state) +{ + struct mii_bus *bus = pcs->bus; + int addr = pcs->addr; + int stat; + + stat = mdiobus_read(bus, addr, C45_ADDR(MDIO_MMD_PCS, MDIO_STAT1)); + if (stat < 0) { + state->link = false; + return; + } + + state->link = !!(stat & MDIO_STAT1_LSTATUS); + if (!state->link) + return; + + switch (state->interface) { + case PHY_INTERFACE_MODE_10GBASER: + state->speed = SPEED_10000; + state->duplex = DUPLEX_FULL; + break; + + default: + break; + } +} +EXPORT_SYMBOL_GPL(phylink_mii_c45_pcs_get_state); + MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index f5fa2fff3ddc..2d99e9de6ee1 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -49,6 +49,8 @@ #define RTL_LPADV_5000FULL BIT(6) #define RTL_LPADV_2500FULL BIT(5) +#define RTLGEN_SPEED_MASK 0x0630 + #define RTL_GENERIC_PHYID 0x001cc800 MODULE_DESCRIPTION("Realtek PHY driver"); @@ -309,6 +311,55 @@ static int rtl8366rb_config_init(struct phy_device *phydev) return ret; } +/* get actual speed to cover the downshift case */ +static int rtlgen_get_speed(struct phy_device *phydev) +{ + int val; + + if (!phydev->link) + return 0; + + val = phy_read_paged(phydev, 0xa43, 0x12); + if (val < 0) + return val; + + switch (val & RTLGEN_SPEED_MASK) { + case 0x0000: + phydev->speed = SPEED_10; + break; + case 0x0010: + phydev->speed = SPEED_100; + break; + case 0x0020: + phydev->speed = SPEED_1000; + break; + case 0x0200: + phydev->speed = SPEED_10000; + break; + case 0x0210: + phydev->speed = SPEED_2500; + break; + case 0x0220: + phydev->speed = SPEED_5000; + break; + default: + break; + } + + return 0; +} + +static int rtlgen_read_status(struct phy_device *phydev) +{ + int ret; + + ret = genphy_read_status(phydev); + if (ret < 0) + return ret; + + return rtlgen_get_speed(phydev); +} + static int rtlgen_read_mmd(struct phy_device *phydev, int devnum, u16 regnum) { int ret; @@ -429,6 +480,8 @@ static int rtl8125_config_aneg(struct phy_device *phydev) static int rtl8125_read_status(struct phy_device *phydev) { + int ret; + if (phydev->autoneg == AUTONEG_ENABLE) { int lpadv = phy_read_paged(phydev, 0xa5d, 0x13); @@ -443,7 +496,11 @@ static int rtl8125_read_status(struct phy_device *phydev) phydev->lp_advertising, lpadv & RTL_LPADV_2500FULL); } - return genphy_read_status(phydev); + ret = genphy_read_status(phydev); + if (ret < 0) + return ret; + + return rtlgen_get_speed(phydev); } static bool rtlgen_supports_2_5gbps(struct phy_device *phydev) @@ -550,6 +607,7 @@ static struct phy_driver realtek_drvs[] = { }, { .name = "Generic FE-GE Realtek PHY", .match_phy_device = rtlgen_match_phy_device, + .read_status = rtlgen_read_status, .suspend = genphy_suspend, .resume = genphy_resume, .read_page = rtl821x_read_page, diff --git a/drivers/net/veth.c b/drivers/net/veth.c index d4cbb9e8c63f..b6505a6c7102 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -34,16 +34,23 @@ #define VETH_RING_SIZE 256 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) -/* Separating two types of XDP xmit */ -#define VETH_XDP_TX BIT(0) -#define VETH_XDP_REDIR BIT(1) - #define VETH_XDP_TX_BULK_SIZE 16 +struct veth_stats { + u64 rx_drops; + /* xdp */ + u64 xdp_packets; + u64 xdp_bytes; + u64 xdp_redirect; + u64 xdp_drops; + u64 xdp_tx; + u64 xdp_tx_err; + u64 xdp_xmit; + u64 xdp_xmit_err; +}; + struct veth_rq_stats { - u64 xdp_packets; - u64 xdp_bytes; - u64 xdp_drops; + struct veth_stats vs; struct u64_stats_sync syncp; }; @@ -80,12 +87,18 @@ struct veth_q_stat_desc { size_t offset; }; -#define VETH_RQ_STAT(m) offsetof(struct veth_rq_stats, m) +#define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) static const struct veth_q_stat_desc veth_rq_stats_desc[] = { { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, - { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, + { "rx_drops", VETH_RQ_STAT(rx_drops) }, + { "rx_xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, + { "rx_xdp_drops", VETH_RQ_STAT(xdp_drops) }, + { "rx_xdp_tx", VETH_RQ_STAT(xdp_tx) }, + { "rx_xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, + { "tx_xdp_xmit", VETH_RQ_STAT(xdp_xmit) }, + { "tx_xdp_xmit_errors", VETH_RQ_STAT(xdp_xmit_err) }, }; #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) @@ -124,7 +137,7 @@ static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) for (i = 0; i < dev->real_num_rx_queues; i++) { for (j = 0; j < VETH_RQ_STATS_LEN; j++) { snprintf(p, ETH_GSTRING_LEN, - "rx_queue_%u_%.11s", + "rx_queue_%u_%.18s", i, veth_rq_stats_desc[j].desc); p += ETH_GSTRING_LEN; } @@ -155,7 +168,7 @@ static void veth_get_ethtool_stats(struct net_device *dev, idx = 1; for (i = 0; i < dev->real_num_rx_queues; i++) { const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; - const void *stats_base = (void *)rq_stats; + const void *stats_base = (void *)&rq_stats->vs; unsigned int start; size_t offset; @@ -283,28 +296,34 @@ static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) return atomic64_read(&priv->dropped); } -static void veth_stats_rx(struct veth_rq_stats *result, struct net_device *dev) +static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); int i; + result->xdp_xmit_err = 0; result->xdp_packets = 0; + result->xdp_tx_err = 0; result->xdp_bytes = 0; - result->xdp_drops = 0; + result->rx_drops = 0; for (i = 0; i < dev->num_rx_queues; i++) { + u64 packets, bytes, drops, xdp_tx_err, xdp_xmit_err; struct veth_rq_stats *stats = &priv->rq[i].stats; - u64 packets, bytes, drops; unsigned int start; do { start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->xdp_packets; - bytes = stats->xdp_bytes; - drops = stats->xdp_drops; + xdp_xmit_err = stats->vs.xdp_xmit_err; + xdp_tx_err = stats->vs.xdp_tx_err; + packets = stats->vs.xdp_packets; + bytes = stats->vs.xdp_bytes; + drops = stats->vs.rx_drops; } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + result->xdp_xmit_err += xdp_xmit_err; + result->xdp_tx_err += xdp_tx_err; result->xdp_packets += packets; result->xdp_bytes += bytes; - result->xdp_drops += drops; + result->rx_drops += drops; } } @@ -313,7 +332,7 @@ static void veth_get_stats64(struct net_device *dev, { struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; - struct veth_rq_stats rx; + struct veth_stats rx; u64 packets, bytes; tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); @@ -321,7 +340,8 @@ static void veth_get_stats64(struct net_device *dev, tot->tx_packets = packets; veth_stats_rx(&rx, dev); - tot->rx_dropped = rx.xdp_drops; + tot->tx_dropped += rx.xdp_xmit_err + rx.xdp_tx_err; + tot->rx_dropped = rx.rx_drops; tot->rx_bytes = rx.xdp_bytes; tot->rx_packets = rx.xdp_packets; @@ -333,6 +353,7 @@ static void veth_get_stats64(struct net_device *dev, tot->rx_packets += packets; veth_stats_rx(&rx, peer); + tot->rx_dropped += rx.xdp_xmit_err + rx.xdp_tx_err; tot->tx_bytes += rx.xdp_bytes; tot->tx_packets += rx.xdp_packets; } @@ -369,28 +390,32 @@ static int veth_select_rxq(struct net_device *dev) } static int veth_xdp_xmit(struct net_device *dev, int n, - struct xdp_frame **frames, u32 flags) + struct xdp_frame **frames, + u32 flags, bool ndo_xmit) { struct veth_priv *rcv_priv, *priv = netdev_priv(dev); + unsigned int qidx, max_len; struct net_device *rcv; int i, ret, drops = n; - unsigned int max_len; struct veth_rq *rq; rcu_read_lock(); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { - ret = -EINVAL; - goto drop; + rcu_read_unlock(); + atomic64_add(drops, &priv->dropped); + return -EINVAL; } rcv = rcu_dereference(priv->peer); if (unlikely(!rcv)) { - ret = -ENXIO; - goto drop; + rcu_read_unlock(); + atomic64_add(drops, &priv->dropped); + return -ENXIO; } rcv_priv = netdev_priv(rcv); - rq = &rcv_priv->rq[veth_select_rxq(rcv)]; + qidx = veth_select_rxq(rcv); + rq = &rcv_priv->rq[qidx]; /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive * side. This means an XDP program is loaded on the peer and the peer * device is up. @@ -419,24 +444,35 @@ static int veth_xdp_xmit(struct net_device *dev, int n, if (flags & XDP_XMIT_FLUSH) __veth_xdp_flush(rq); - if (likely(!drops)) { - rcu_read_unlock(); - return n; - } - ret = n - drops; drop: + rq = &priv->rq[qidx]; + u64_stats_update_begin(&rq->stats.syncp); + if (ndo_xmit) { + rq->stats.vs.xdp_xmit += n - drops; + rq->stats.vs.xdp_xmit_err += drops; + } else { + rq->stats.vs.xdp_tx += n - drops; + rq->stats.vs.xdp_tx_err += drops; + } + u64_stats_update_end(&rq->stats.syncp); + rcu_read_unlock(); - atomic64_add(drops, &priv->dropped); return ret; } +static int veth_ndo_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + return veth_xdp_xmit(dev, n, frames, flags, true); +} + static void veth_xdp_flush_bq(struct net_device *dev, struct veth_xdp_tx_bq *bq) { int sent, i, err = 0; - sent = veth_xdp_xmit(dev, bq->count, bq->q, 0); + sent = veth_xdp_xmit(dev, bq->count, bq->q, 0, false); if (sent < 0) { err = sent; sent = 0; @@ -489,8 +525,8 @@ static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp, static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, struct xdp_frame *frame, - unsigned int *xdp_xmit, - struct veth_xdp_tx_bq *bq) + struct veth_xdp_tx_bq *bq, + struct veth_stats *stats) { void *hard_start = frame->data - frame->headroom; void *head = hard_start - sizeof(struct xdp_frame); @@ -526,9 +562,10 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) { trace_xdp_exception(rq->dev, xdp_prog, act); frame = &orig_frame; + stats->rx_drops++; goto err_xdp; } - *xdp_xmit |= VETH_XDP_TX; + stats->xdp_tx++; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: @@ -537,9 +574,10 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, xdp.rxq->mem = frame->mem; if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { frame = &orig_frame; + stats->rx_drops++; goto err_xdp; } - *xdp_xmit |= VETH_XDP_REDIR; + stats->xdp_redirect++; rcu_read_unlock(); goto xdp_xmit; default: @@ -549,6 +587,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, trace_xdp_exception(rq->dev, xdp_prog, act); /* fall through */ case XDP_DROP: + stats->xdp_drops++; goto err_xdp; } } @@ -558,6 +597,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, skb = veth_build_skb(head, headroom, len, 0); if (!skb) { xdp_return_frame(frame); + stats->rx_drops++; goto err; } @@ -573,9 +613,10 @@ xdp_xmit: return NULL; } -static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, - unsigned int *xdp_xmit, - struct veth_xdp_tx_bq *bq) +static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, + struct sk_buff *skb, + struct veth_xdp_tx_bq *bq, + struct veth_stats *stats) { u32 pktlen, headroom, act, metalen; void *orig_data, *orig_data_end; @@ -653,18 +694,21 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, xdp.rxq->mem = rq->xdp_mem; if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) { trace_xdp_exception(rq->dev, xdp_prog, act); + stats->rx_drops++; goto err_xdp; } - *xdp_xmit |= VETH_XDP_TX; + stats->xdp_tx++; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: get_page(virt_to_page(xdp.data)); consume_skb(skb); xdp.rxq->mem = rq->xdp_mem; - if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) + if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { + stats->rx_drops++; goto err_xdp; - *xdp_xmit |= VETH_XDP_REDIR; + } + stats->xdp_redirect++; rcu_read_unlock(); goto xdp_xmit; default: @@ -674,7 +718,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, trace_xdp_exception(rq->dev, xdp_prog, act); /* fall through */ case XDP_DROP: - goto drop; + stats->xdp_drops++; + goto xdp_drop; } rcu_read_unlock(); @@ -696,6 +741,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, out: return skb; drop: + stats->rx_drops++; +xdp_drop: rcu_read_unlock(); kfree_skb(skb); return NULL; @@ -706,14 +753,14 @@ xdp_xmit: return NULL; } -static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit, - struct veth_xdp_tx_bq *bq) +static int veth_xdp_rcv(struct veth_rq *rq, int budget, + struct veth_xdp_tx_bq *bq, + struct veth_stats *stats) { - int i, done = 0, drops = 0, bytes = 0; + int i, done = 0; for (i = 0; i < budget; i++) { void *ptr = __ptr_ring_consume(&rq->xdp_ring); - unsigned int xdp_xmit_one = 0; struct sk_buff *skb; if (!ptr) @@ -722,27 +769,26 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit, if (veth_is_xdp_frame(ptr)) { struct xdp_frame *frame = veth_ptr_to_xdp(ptr); - bytes += frame->len; - skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one, bq); + stats->xdp_bytes += frame->len; + skb = veth_xdp_rcv_one(rq, frame, bq, stats); } else { skb = ptr; - bytes += skb->len; - skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one, bq); + stats->xdp_bytes += skb->len; + skb = veth_xdp_rcv_skb(rq, skb, bq, stats); } - *xdp_xmit |= xdp_xmit_one; if (skb) napi_gro_receive(&rq->xdp_napi, skb); - else if (!xdp_xmit_one) - drops++; done++; } u64_stats_update_begin(&rq->stats.syncp); - rq->stats.xdp_packets += done; - rq->stats.xdp_bytes += bytes; - rq->stats.xdp_drops += drops; + rq->stats.vs.xdp_redirect += stats->xdp_redirect; + rq->stats.vs.xdp_bytes += stats->xdp_bytes; + rq->stats.vs.xdp_drops += stats->xdp_drops; + rq->stats.vs.rx_drops += stats->rx_drops; + rq->stats.vs.xdp_packets += done; u64_stats_update_end(&rq->stats.syncp); return done; @@ -752,14 +798,14 @@ static int veth_poll(struct napi_struct *napi, int budget) { struct veth_rq *rq = container_of(napi, struct veth_rq, xdp_napi); - unsigned int xdp_xmit = 0; + struct veth_stats stats = {}; struct veth_xdp_tx_bq bq; int done; bq.count = 0; xdp_set_return_frame_no_direct(); - done = veth_xdp_rcv(rq, budget, &xdp_xmit, &bq); + done = veth_xdp_rcv(rq, budget, &bq, &stats); if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ @@ -770,9 +816,9 @@ static int veth_poll(struct napi_struct *napi, int budget) } } - if (xdp_xmit & VETH_XDP_TX) + if (stats.xdp_tx > 0) veth_xdp_flush(rq->dev, &bq); - if (xdp_xmit & VETH_XDP_REDIR) + if (stats.xdp_redirect > 0) xdp_do_flush(); xdp_clear_return_frame_no_direct(); @@ -1158,7 +1204,7 @@ static const struct net_device_ops veth_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = veth_set_rx_headroom, .ndo_bpf = veth_xdp, - .ndo_xdp_xmit = veth_xdp_xmit, + .ndo_xdp_xmit = veth_ndo_xdp_xmit, }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 56084635dd63..ffdb5bc25d6d 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -128,6 +128,7 @@ config QCOM_Q6V5_MSS select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_Q6V5_COMMON + select QCOM_Q6V5_IPA_NOTIFY select QCOM_RPROC_COMMON select QCOM_SCM help @@ -169,9 +170,6 @@ config QCOM_Q6V5_WCSS config QCOM_Q6V5_IPA_NOTIFY tristate - depends on QCOM_IPA - depends on QCOM_Q6V5_MSS - default QCOM_IPA config QCOM_SYSMON tristate "Qualcomm sysmon driver" diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 962be94ed3ca..6eb431c194bd 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -847,11 +847,6 @@ struct qeth_trap_id { /*some helper functions*/ #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "") -static inline bool qeth_netdev_is_registered(struct net_device *dev) -{ - return dev->netdev_ops != NULL; -} - static inline u16 qeth_iqd_translate_txq(struct net_device *dev, u16 txq) { if (txq == QETH_IQD_MCAST_TXQ) @@ -1053,6 +1048,7 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq); int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); void qeth_trace_features(struct qeth_card *); int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long); +int qeth_setup_netdev(struct qeth_card *card); int qeth_set_features(struct net_device *, netdev_features_t); void qeth_enable_hw_features(struct net_device *dev); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); @@ -1060,6 +1056,7 @@ netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); +int qeth_set_real_num_tx_queues(struct qeth_card *card, unsigned int count); u16 qeth_iqd_select_queue(struct net_device *dev, struct sk_buff *skb, u8 cast_type, struct net_device *sb_dev); int qeth_open(struct net_device *dev); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6caa78d51bd1..bd3adbb6ad50 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -244,7 +244,7 @@ static struct qeth_buffer_pool_entry *qeth_alloc_pool_entry(unsigned int pages) return NULL; for (i = 0; i < pages; i++) { - entry->elements[i] = alloc_page(GFP_KERNEL); + entry->elements[i] = __dev_alloc_page(GFP_KERNEL); if (!entry->elements[i]) { qeth_free_pool_entry(entry); @@ -538,9 +538,10 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, for (i = 0; i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); i++) { - if (aob->sba[i] && buffer->is_header[i]) - kmem_cache_free(qeth_core_header_cache, - (void *) aob->sba[i]); + void *data = phys_to_virt(aob->sba[i]); + + if (data && buffer->is_header[i]) + kmem_cache_free(qeth_core_header_cache, data); } atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED); @@ -1244,9 +1245,12 @@ EXPORT_SYMBOL_GPL(qeth_drain_output_queues); static int qeth_osa_set_output_queues(struct qeth_card *card, bool single) { - unsigned int count = single ? 1 : card->dev->num_tx_queues; + unsigned int max = single ? 1 : card->dev->num_tx_queues; + unsigned int count; int rc; + count = IS_VM_NIC(card) ? min(max, card->dev->real_num_tx_queues) : max; + rtnl_lock(); rc = netif_set_real_num_tx_queues(card->dev, count); rtnl_unlock(); @@ -1254,16 +1258,16 @@ static int qeth_osa_set_output_queues(struct qeth_card *card, bool single) if (rc) return rc; - if (card->qdio.no_out_queues == count) + if (card->qdio.no_out_queues == max) return 0; if (atomic_read(&card->qdio.state) != QETH_QDIO_UNINITIALIZED) qeth_free_qdio_queues(card); - if (count == 1) + if (max == 1 && card->qdio.do_prio_queueing != QETH_PRIOQ_DEFAULT) dev_info(&card->gdev->dev, "Priority Queueing not supported\n"); - card->qdio.no_out_queues = count; + card->qdio.no_out_queues = max; return 0; } @@ -2654,7 +2658,7 @@ static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry( struct qeth_buffer_pool_entry, list); for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) { if (page_count(entry->elements[i]) > 1) { - struct page *page = alloc_page(GFP_ATOMIC); + struct page *page = dev_alloc_page(); if (!page) return NULL; @@ -3352,6 +3356,7 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, for (i = index; i < index + count; ++i) { unsigned int bidx = QDIO_BUFNR(i); + struct sk_buff *skb; buf = queue->bufs[bidx]; buf->buffer->element[buf->next_element_to_fill - 1].eflags |= @@ -3360,8 +3365,11 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, if (queue->bufstates) queue->bufstates[bidx].user = buf; - if (IS_IQD(queue->card)) + if (IS_IQD(card)) { + skb_queue_walk(&buf->skb_list, skb) + skb_tx_timestamp(skb); continue; + } if (!queue->do_pack) { if ((atomic_read(&queue->used_buffers) >= @@ -3705,6 +3713,7 @@ static int qeth_add_hw_header(struct qeth_qdio_out_q *queue, unsigned int hdr_len, unsigned int proto_len, unsigned int *elements) { + gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0); const unsigned int contiguous = proto_len ? proto_len : 1; const unsigned int max_elements = queue->max_elements; unsigned int __elements; @@ -3760,10 +3769,11 @@ check_layout: *hdr = skb_push(skb, hdr_len); return hdr_len; } - /* fall back */ + + /* Fall back to cache element with known-good alignment: */ if (hdr_len + proto_len > QETH_HDR_CACHE_OBJ_SIZE) return -E2BIG; - *hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); + *hdr = kmem_cache_alloc(qeth_core_header_cache, gfp); if (!*hdr) return -ENOMEM; /* Copy protocol headers behind HW header: */ @@ -5985,22 +5995,8 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card) SET_NETDEV_DEV(dev, &card->gdev->dev); netif_carrier_off(dev); - if (IS_OSN(card)) { - dev->ethtool_ops = &qeth_osn_ethtool_ops; - } else { - dev->ethtool_ops = &qeth_ethtool_ops; - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->hw_features |= NETIF_F_SG; - dev->vlan_features |= NETIF_F_SG; - if (IS_IQD(card)) { - dev->features |= NETIF_F_SG; - if (netif_set_real_num_tx_queues(dev, - QETH_IQD_MIN_TXQ)) { - free_netdev(dev); - return NULL; - } - } - } + dev->ethtool_ops = IS_OSN(card) ? &qeth_osn_ethtool_ops : + &qeth_ethtool_ops; return dev; } @@ -6016,6 +6012,28 @@ struct net_device *qeth_clone_netdev(struct net_device *orig) return clone; } +int qeth_setup_netdev(struct qeth_card *card) +{ + struct net_device *dev = card->dev; + unsigned int num_tx_queues; + + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->hw_features |= NETIF_F_SG; + dev->vlan_features |= NETIF_F_SG; + + if (IS_IQD(card)) { + dev->features |= NETIF_F_SG; + num_tx_queues = QETH_IQD_MIN_TXQ; + } else if (IS_VM_NIC(card)) { + num_tx_queues = 1; + } else { + num_tx_queues = dev->real_num_tx_queues; + } + + return qeth_set_real_num_tx_queues(card, num_tx_queues); +} +EXPORT_SYMBOL_GPL(qeth_setup_netdev); + static int qeth_core_probe_device(struct ccwgroup_device *gdev) { struct qeth_card *card; @@ -6055,12 +6073,13 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) goto err_card; } + qeth_determine_capabilities(card); + qeth_set_blkt_defaults(card); + card->qdio.no_out_queues = card->dev->num_tx_queues; rc = qeth_update_from_chp_desc(card); if (rc) goto err_chp_desc; - qeth_determine_capabilities(card); - qeth_set_blkt_defaults(card); enforced_disc = qeth_enforce_discipline(card); switch (enforced_disc) { @@ -6245,9 +6264,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) struct mii_ioctl_data *mii_data; int rc = 0; - if (!card) - return -ENODEV; - switch (cmd) { case SIOC_QETH_ADP_SET_SNMP_CONTROL: rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data); @@ -6627,12 +6643,59 @@ void qeth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) } EXPORT_SYMBOL_GPL(qeth_get_stats64); +#define TC_IQD_UCAST 0 +static void qeth_iqd_set_prio_tc_map(struct net_device *dev, + unsigned int ucast_txqs) +{ + unsigned int prio; + + /* IQD requires mcast traffic to be placed on a dedicated queue, and + * qeth_iqd_select_queue() deals with this. + * For unicast traffic, we defer the queue selection to the stack. + * By installing a trivial prio map that spans over only the unicast + * queues, we can encourage the stack to spread the ucast traffic evenly + * without selecting the mcast queue. + */ + + /* One traffic class, spanning over all active ucast queues: */ + netdev_set_num_tc(dev, 1); + netdev_set_tc_queue(dev, TC_IQD_UCAST, ucast_txqs, + QETH_IQD_MIN_UCAST_TXQ); + + /* Map all priorities to this traffic class: */ + for (prio = 0; prio <= TC_BITMASK; prio++) + netdev_set_prio_tc_map(dev, prio, TC_IQD_UCAST); +} + +int qeth_set_real_num_tx_queues(struct qeth_card *card, unsigned int count) +{ + struct net_device *dev = card->dev; + int rc; + + /* Per netif_setup_tc(), adjust the mapping first: */ + if (IS_IQD(card)) + qeth_iqd_set_prio_tc_map(dev, count - 1); + + rc = netif_set_real_num_tx_queues(dev, count); + + if (rc && IS_IQD(card)) + qeth_iqd_set_prio_tc_map(dev, dev->real_num_tx_queues - 1); + + return rc; +} + u16 qeth_iqd_select_queue(struct net_device *dev, struct sk_buff *skb, u8 cast_type, struct net_device *sb_dev) { + u16 txq; + if (cast_type != RTN_UNICAST) return QETH_IQD_MCAST_TXQ; - return QETH_IQD_MIN_UCAST_TXQ; + if (dev->real_num_tx_queues == QETH_IQD_MIN_TXQ) + return QETH_IQD_MIN_UCAST_TXQ; + + txq = netdev_pick_tx(dev, skb, sb_dev); + return (txq == QETH_IQD_MCAST_TXQ) ? QETH_IQD_MIN_UCAST_TXQ : txq; } EXPORT_SYMBOL_GPL(qeth_iqd_select_queue); diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index 78cae61bc924..533a7f26dbe1 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -176,7 +176,7 @@ static ssize_t qeth_dev_prioqing_store(struct device *dev, struct qeth_card *card = dev_get_drvdata(dev); int rc = 0; - if (IS_IQD(card)) + if (IS_IQD(card) || IS_VM_NIC(card)) return -EOPNOTSUPP; mutex_lock(&card->conf_mutex); diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index 9052c72d5b8f..31e019085fc3 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -153,7 +153,6 @@ static void qeth_get_drvinfo(struct net_device *dev, strlcpy(info->driver, IS_LAYER2(card) ? "qeth_l2" : "qeth_l3", sizeof(info->driver)); - strlcpy(info->version, "1.0", sizeof(info->version)); strlcpy(info->fw_version, card->info.mcl_level, sizeof(info->fw_version)); snprintf(info->bus_info, sizeof(info->bus_info), "%s/%s/%s", @@ -175,6 +174,46 @@ static void qeth_get_channels(struct net_device *dev, channels->combined_count = 0; } +static int qeth_set_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qeth_card *card = dev->ml_priv; + + if (channels->rx_count == 0 || channels->tx_count == 0) + return -EINVAL; + if (channels->tx_count > card->qdio.no_out_queues) + return -EINVAL; + + if (IS_IQD(card)) { + if (channels->tx_count < QETH_IQD_MIN_TXQ) + return -EINVAL; + + /* Reject downgrade while running. It could push displaced + * ucast flows onto txq0, which is reserved for mcast. + */ + if (netif_running(dev) && + channels->tx_count < dev->real_num_tx_queues) + return -EPERM; + } else { + /* OSA still uses the legacy prio-queue mechanism: */ + if (!IS_VM_NIC(card)) + return -EOPNOTSUPP; + } + + return qeth_set_real_num_tx_queues(card, channels->tx_count); +} + +static int qeth_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct qeth_card *card = dev->ml_priv; + + if (!IS_IQD(card)) + return -EOPNOTSUPP; + + return ethtool_op_get_ts_info(dev, info); +} + static int qeth_get_tunable(struct net_device *dev, const struct ethtool_tunable *tuna, void *data) { @@ -410,6 +449,8 @@ const struct ethtool_ops qeth_ethtool_ops = { .get_sset_count = qeth_get_sset_count, .get_drvinfo = qeth_get_drvinfo, .get_channels = qeth_get_channels, + .set_channels = qeth_set_channels, + .get_ts_info = qeth_get_ts_info, .get_tunable = qeth_get_tunable, .set_tunable = qeth_set_tunable, .get_link_ksettings = qeth_get_link_ksettings, diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 4c8e93132e08..73cb363b1fab 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -499,6 +499,7 @@ static void qeth_l2_rx_mode_work(struct work_struct *work) static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue) { + gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0); struct qeth_hdr *hdr = (struct qeth_hdr *)skb->data; addr_t end = (addr_t)(skb->data + sizeof(*hdr)); addr_t start = (addr_t)skb->data; @@ -511,7 +512,7 @@ static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb, if (qeth_get_elements_for_range(start, end) > 1) { /* Misaligned HW header, move it to its own buffer element. */ - hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); + hdr = kmem_cache_alloc(qeth_core_header_cache, gfp); if (!hdr) return -ENOMEM; hd_len = sizeof(*hdr); @@ -570,7 +571,9 @@ static u16 qeth_l2_select_queue(struct net_device *dev, struct sk_buff *skb, return qeth_iqd_select_queue(dev, skb, qeth_get_ether_cast_type(skb), sb_dev); - return qeth_get_priority_queue(card, skb); + + return IS_VM_NIC(card) ? netdev_pick_tx(dev, skb, sb_dev) : + qeth_get_priority_queue(card, skb); } static const struct device_type qeth_l2_devtype = { @@ -610,7 +613,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_set_offline(card, false); cancel_work_sync(&card->close_dev_work); - if (qeth_netdev_is_registered(card->dev)) + if (card->dev->reg_state == NETREG_REGISTERED) unregister_netdev(card->dev); } @@ -648,7 +651,7 @@ static const struct net_device_ops qeth_osn_netdev_ops = { .ndo_tx_timeout = qeth_tx_timeout, }; -static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) +static int qeth_l2_setup_netdev(struct qeth_card *card) { int rc; @@ -658,6 +661,10 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) goto add_napi; } + rc = qeth_setup_netdev(card); + if (rc) + return rc; + card->dev->needed_headroom = sizeof(struct qeth_hdr); card->dev->netdev_ops = &qeth_l2_netdev_ops; card->dev->priv_flags |= IFF_UNICAST_FLT; @@ -704,13 +711,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) add_napi: netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); - rc = register_netdev(card->dev); - if (!rc && carrier_ok) - netif_carrier_on(card->dev); - - if (rc) - card->dev->netdev_ops = NULL; - return rc; + return register_netdev(card->dev); } static void qeth_l2_trace_features(struct qeth_card *card) @@ -783,10 +784,13 @@ static int qeth_l2_set_online(struct qeth_card *card) qeth_set_allowed_threads(card, 0xffffffff, 0); - if (!qeth_netdev_is_registered(dev)) { - rc = qeth_l2_setup_netdev(card, carrier_ok); + if (dev->reg_state != NETREG_REGISTERED) { + rc = qeth_l2_setup_netdev(card); if (rc) goto out_remove; + + if (carrier_ok) + netif_carrier_on(dev); } else { rtnl_lock(); if (carrier_ok) @@ -1512,8 +1516,6 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable) struct ccw_device *ddev; struct subchannel_id schid; - if (!card) - return -EINVAL; if (!card->options.sbp.supported_funcs) return -EOPNOTSUPP; ddev = CARD_DDEV(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 8a803d6c9357..83ae75cf1389 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1880,7 +1880,8 @@ static u16 qeth_l3_osa_select_queue(struct net_device *dev, struct sk_buff *skb, { struct qeth_card *card = dev->ml_priv; - return qeth_get_priority_queue(card, skb); + return IS_VM_NIC(card) ? netdev_pick_tx(dev, skb, sb_dev) : + qeth_get_priority_queue(card, skb); } static const struct net_device_ops qeth_l3_netdev_ops = { @@ -1917,11 +1918,15 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_neigh_setup = qeth_l3_neigh_setup, }; -static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) +static int qeth_l3_setup_netdev(struct qeth_card *card) { unsigned int headroom; int rc; + rc = qeth_setup_netdev(card); + if (rc) + return rc; + if (IS_OSD(card) || IS_OSX(card)) { if ((card->info.link_type == QETH_LINK_TYPE_LANE_TR) || (card->info.link_type == QETH_LINK_TYPE_HSTR)) { @@ -1967,7 +1972,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) rc = qeth_l3_iqd_read_initial_mac(card); if (rc) - goto out; + return rc; } else return -ENODEV; @@ -1982,14 +1987,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) PAGE_SIZE * (QETH_MAX_BUFFER_ELEMENTS(card) - 1)); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); - rc = register_netdev(card->dev); - if (!rc && carrier_ok) - netif_carrier_on(card->dev); - -out: - if (rc) - card->dev->netdev_ops = NULL; - return rc; + return register_netdev(card->dev); } static const struct device_type qeth_l3_devtype = { @@ -2036,7 +2034,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_set_offline(card, false); cancel_work_sync(&card->close_dev_work); - if (qeth_netdev_is_registered(card->dev)) + if (card->dev->reg_state == NETREG_REGISTERED) unregister_netdev(card->dev); flush_workqueue(card->cmd_wq); @@ -2083,10 +2081,13 @@ static int qeth_l3_set_online(struct qeth_card *card) qeth_set_allowed_threads(card, 0xffffffff, 0); qeth_l3_recover_ip(card); - if (!qeth_netdev_is_registered(dev)) { - rc = qeth_l3_setup_netdev(card, carrier_ok); + if (dev->reg_state != NETREG_REGISTERED) { + rc = qeth_l3_setup_netdev(card); if (rc) goto out_remove; + + if (carrier_ok) + netif_carrier_on(dev); } else { rtnl_lock(); if (carrier_ok) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index be355f37337d..c1d379bf6ee1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -458,6 +458,8 @@ struct ethtool_ops { struct ethtool_stats *, u64 *); }; +int ethtool_check_ops(const struct ethtool_ops *ops); + struct ethtool_rx_flow_rule { struct flow_rule *rule; unsigned long priv[0]; diff --git a/include/linux/mdio.h b/include/linux/mdio.h index a7604248777b..917e4bb2ed71 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -316,11 +316,15 @@ static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, + u16 mask, u16 set); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, + u16 set); int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2bd920965bd3..cc55cee3b53c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -416,7 +416,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 termination_table[0x1]; u8 reformat_and_fwd_to_table[0x1]; u8 reserved_at_1a[0x6]; - u8 reserved_at_20[0x2]; + u8 termination_table_raw_traffic[0x1]; + u8 reserved_at_21[0x1]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; u8 max_modify_header_actions[0x8]; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5448c8b443db..ab192720e2d6 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -98,7 +98,7 @@ struct ip_set_counter { struct ip_set_comment_rcu { struct rcu_head rcu; - char str[0]; + char str[]; }; struct ip_set_comment { diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1b261c51b3a3..5da88451853b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -264,7 +264,7 @@ struct xt_table_info { unsigned int stacksize; void ***jumpstack; - unsigned char entries[0] __aligned(8); + unsigned char entries[] __aligned(8); }; int xt_register_target(struct xt_target *target); @@ -464,7 +464,7 @@ struct compat_xt_entry_match { } kernel; u_int16_t match_size; } u; - unsigned char data[0]; + unsigned char data[]; }; struct compat_xt_entry_target { @@ -480,7 +480,7 @@ struct compat_xt_entry_target { } kernel; u_int16_t target_size; } u; - unsigned char data[0]; + unsigned char data[]; }; /* FIXME: this works only on 32 bit tasks @@ -494,7 +494,7 @@ struct compat_xt_counters { struct compat_xt_counters_info { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t num_counters; - struct compat_xt_counters counters[0]; + struct compat_xt_counters counters[]; }; struct _compat_xt_align { diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index e98028f00e47..7d3537c40ec9 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -67,7 +67,7 @@ struct compat_arpt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 162f59d0d17a..2f5c4e6ecd8a 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -85,7 +85,7 @@ struct ebt_table_info { /* room to maintain the stack used for jumping from and into udc */ struct ebt_chainstack **chainstack; char *entries; - struct ebt_counter counters[0] ____cacheline_aligned; + struct ebt_counter counters[] ____cacheline_aligned; }; struct ebt_table { diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index e9e1ed74cdf1..b394bd4f68a3 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -76,7 +76,7 @@ struct compat_ipt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; /* Helper functions */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 78ab959c4575..8225f7821a29 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -43,7 +43,7 @@ struct compat_ip6t_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b872aed8ba6..36d9dea04016 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -23,6 +23,7 @@ #include <linux/workqueue.h> #include <linux/mod_devicetable.h> #include <linux/u64_stats_sync.h> +#include <linux/irqreturn.h> #include <linux/atomic.h> @@ -568,7 +569,7 @@ struct phy_driver { int (*did_interrupt)(struct phy_device *phydev); /* Override default interrupt handling */ - int (*handle_interrupt)(struct phy_device *phydev); + irqreturn_t (*handle_interrupt)(struct phy_device *phydev); /* Clears up any memory if needed */ void (*remove)(struct phy_device *phydev); @@ -754,6 +755,25 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) } /** + * __phy_modify_changed() - Convenience function for modifying a PHY register + * @phydev: a pointer to a &struct phy_device + * @regnum: register number + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Unlocked helper function which allows a PHY register to be modified as + * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, + u16 mask, u16 set) +{ + return __mdiobus_modify_changed(phydev->mdio.bus, phydev->mdio.addr, + regnum, mask, set); +} + +/** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. * @phydev: The phy_device struct diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2180eb1aa254..8fa6df3b881b 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -317,4 +317,12 @@ int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); void phylink_set_port_modes(unsigned long *bits); void phylink_helper_basex_speed(struct phylink_link_state *state); +void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); +int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, + const struct phylink_link_state *state); +void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); + +void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); #endif diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9e531ec76274..4beb51009b62 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -562,8 +562,8 @@ static inline int sysfs_groups_change_owner(struct kobject *kobj, } static inline int sysfs_group_change_owner(struct kobject *kobj, - const struct attribute_group **groups, - kuid_t kuid, kgid_t kgid) + const struct attribute_group *groups, + kuid_t kuid, kgid_t kgid) { return 0; } diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index a71378007e61..c80539be1542 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -138,7 +138,7 @@ struct lowpan_dev { struct lowpan_iphc_ctx_table ctx; /* must be last */ - u8 priv[0] __aligned(sizeof(void *)); + u8 priv[] __aligned(sizeof(void *)); }; struct lowpan_802154_neigh { diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e42bb8e03c09..1576353a2773 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -121,6 +121,23 @@ struct bt_voice { #define BT_SNDMTU 12 #define BT_RCVMTU 13 +#define BT_PHY 14 + +#define BT_PHY_BR_1M_1SLOT 0x00000001 +#define BT_PHY_BR_1M_3SLOT 0x00000002 +#define BT_PHY_BR_1M_5SLOT 0x00000004 +#define BT_PHY_EDR_2M_1SLOT 0x00000008 +#define BT_PHY_EDR_2M_3SLOT 0x00000010 +#define BT_PHY_EDR_2M_5SLOT 0x00000020 +#define BT_PHY_EDR_3M_1SLOT 0x00000040 +#define BT_PHY_EDR_3M_3SLOT 0x00000080 +#define BT_PHY_EDR_3M_5SLOT 0x00000100 +#define BT_PHY_LE_1M_TX 0x00000200 +#define BT_PHY_LE_1M_RX 0x00000400 +#define BT_PHY_LE_2M_TX 0x00000800 +#define BT_PHY_LE_2M_RX 0x00001000 +#define BT_PHY_LE_CODED_TX 0x00002000 +#define BT_PHY_LE_CODED_RX 0x00004000 __printf(1, 2) void bt_info(const char *fmt, ...); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6293bdd7d862..5f60e135aeb6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -115,7 +115,7 @@ enum { * wrongly configured local features that will require forcing * them to enable this mode. Getting RSSI information with the * inquiry responses is preferred since it allows for a better - * user expierence. + * user experience. * * This quirk must be set before hci_register_dev is called. */ @@ -142,7 +142,7 @@ enum { /* When this quirk is set, an external configuration step * is required and will be indicated with the controller - * configuation. + * configuration. * * This quirk can be set before hci_register_dev is called or * during the hdev->setup vendor callback. @@ -205,6 +205,15 @@ enum { * */ HCI_QUIRK_NON_PERSISTENT_SETUP, + + /* When this quirk is set, wide band speech is supported by + * the driver since no reliable mechanism exist to report + * this from the hardware, a driver flag is use to convey + * this support + * + * This quirk must be set before hci_register_dev is called. + */ + HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, }; /* HCI device flags */ @@ -277,6 +286,7 @@ enum { HCI_FAST_CONNECTABLE, HCI_BREDR_ENABLED, HCI_LE_SCAN_INTERRUPTED, + HCI_WIDEBAND_SPEECH_ENABLED, HCI_DUT_MODE, HCI_VENDOR_DIAG, @@ -932,10 +942,14 @@ struct hci_cp_sniff_subrate { #define HCI_OP_RESET 0x0c03 #define HCI_OP_SET_EVENT_FLT 0x0c05 -struct hci_cp_set_event_flt { - __u8 flt_type; - __u8 cond_type; - __u8 condition[0]; +#define HCI_SET_EVENT_FLT_SIZE 9 +struct hci_cp_set_event_filter { + __u8 flt_type; + __u8 cond_type; + struct { + bdaddr_t bdaddr; + __u8 auto_accept; + } __packed addr_conn_flt; } __packed; /* Filter types */ @@ -949,8 +963,9 @@ struct hci_cp_set_event_flt { #define HCI_CONN_SETUP_ALLOW_BDADDR 0x02 /* CONN_SETUP Conditions */ -#define HCI_CONN_SETUP_AUTO_OFF 0x01 -#define HCI_CONN_SETUP_AUTO_ON 0x02 +#define HCI_CONN_SETUP_AUTO_OFF 0x01 +#define HCI_CONN_SETUP_AUTO_ON 0x02 +#define HCI_CONN_SETUP_AUTO_ON_WITH_RS 0x03 #define HCI_OP_READ_STORED_LINK_KEY 0x0c0d struct hci_cp_read_stored_link_key { @@ -1086,6 +1101,19 @@ struct hci_rp_read_inq_rsp_tx_power { __s8 tx_power; } __packed; +#define HCI_OP_READ_DEF_ERR_DATA_REPORTING 0x0c5a + #define ERR_DATA_REPORTING_DISABLED 0x00 + #define ERR_DATA_REPORTING_ENABLED 0x01 +struct hci_rp_read_def_err_data_reporting { + __u8 status; + __u8 err_data_reporting; +} __packed; + +#define HCI_OP_WRITE_DEF_ERR_DATA_REPORTING 0x0c5b +struct hci_cp_write_def_err_data_reporting { + __u8 err_data_reporting; +} __packed; + #define HCI_OP_SET_EVENT_MASK_PAGE_2 0x0c63 #define HCI_OP_READ_LOCATION_DATA 0x0c64 @@ -1335,7 +1363,7 @@ struct hci_rp_read_local_amp_assoc { __u8 status; __u8 phy_handle; __le16 rem_len; - __u8 frag[0]; + __u8 frag[]; } __packed; #define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b @@ -1343,7 +1371,7 @@ struct hci_cp_write_remote_amp_assoc { __u8 phy_handle; __le16 len_so_far; __le16 rem_len; - __u8 frag[0]; + __u8 frag[]; } __packed; struct hci_rp_write_remote_amp_assoc { __u8 status; @@ -1613,7 +1641,7 @@ struct hci_cp_le_set_ext_scan_params { __u8 own_addr_type; __u8 filter_policy; __u8 scanning_phys; - __u8 data[0]; + __u8 data[]; } __packed; #define LE_SCAN_PHY_1M 0x01 @@ -1641,7 +1669,7 @@ struct hci_cp_le_ext_create_conn { __u8 peer_addr_type; bdaddr_t peer_addr; __u8 phys; - __u8 data[0]; + __u8 data[]; } __packed; struct hci_cp_le_ext_conn_param { @@ -1693,7 +1721,7 @@ struct hci_rp_le_set_ext_adv_params { struct hci_cp_le_set_ext_adv_enable { __u8 enable; __u8 num_of_sets; - __u8 data[0]; + __u8 data[]; } __packed; struct hci_cp_ext_adv_set { @@ -1724,6 +1752,8 @@ struct hci_cp_le_set_ext_scan_rsp_data { #define LE_SET_ADV_DATA_NO_FRAG 0x01 +#define HCI_OP_LE_REMOVE_ADV_SET 0x203c + #define HCI_OP_LE_CLEAR_ADV_SETS 0x203d #define HCI_OP_LE_SET_ADV_SET_RAND_ADDR 0x2035 @@ -1775,14 +1805,14 @@ struct hci_cp_le_set_cig_params { __le16 m_latency; __le16 s_latency; __u8 num_cis; - struct hci_cis_params cis[0]; + struct hci_cis_params cis[]; } __packed; struct hci_rp_le_set_cig_params { __u8 status; __u8 cig_id; __u8 num_handles; - __le16 handle[0]; + __le16 handle[]; } __packed; #define HCI_OP_LE_CREATE_CIS 0x2064 @@ -1793,7 +1823,7 @@ struct hci_cis { struct hci_cp_le_create_cis { __u8 num_cis; - struct hci_cis cis[0]; + struct hci_cis cis[]; } __packed; #define HCI_OP_LE_REMOVE_CIG 0x2065 @@ -1937,7 +1967,7 @@ struct hci_comp_pkts_info { struct hci_ev_num_comp_pkts { __u8 num_hndl; - struct hci_comp_pkts_info handles[0]; + struct hci_comp_pkts_info handles[]; } __packed; #define HCI_EV_MODE_CHANGE 0x14 @@ -2170,7 +2200,7 @@ struct hci_comp_blocks_info { struct hci_ev_num_comp_blocks { __le16 num_blocks; __u8 num_hndl; - struct hci_comp_blocks_info handles[0]; + struct hci_comp_blocks_info handles[]; } __packed; #define HCI_EV_SYNC_TRAIN_COMPLETE 0x4F @@ -2226,7 +2256,7 @@ struct hci_ev_le_advertising_info { __u8 bdaddr_type; bdaddr_t bdaddr; __u8 length; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03 @@ -2302,7 +2332,7 @@ struct hci_ev_le_ext_adv_report { __u8 direct_addr_type; bdaddr_t direct_addr; __u8 length; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a @@ -2362,7 +2392,7 @@ struct hci_evt_le_cis_req { #define HCI_EV_STACK_INTERNAL 0xfd struct hci_ev_stack_internal { __u16 type; - __u8 data[0]; + __u8 data[]; } __packed; #define HCI_EV_SI_DEVICE 0x01 @@ -2409,7 +2439,7 @@ struct hci_sco_hdr { struct hci_iso_hdr { __le16 handle; __le16 dlen; - __u8 data[0]; + __u8 data[]; } __packed; /* ISO data packet status flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 89ecf0a80aa1..d4e28773d378 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -88,6 +88,31 @@ struct discovery_state { unsigned long scan_duration; }; +#define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ + +enum suspend_tasks { + SUSPEND_PAUSE_DISCOVERY, + SUSPEND_UNPAUSE_DISCOVERY, + + SUSPEND_PAUSE_ADVERTISING, + SUSPEND_UNPAUSE_ADVERTISING, + + SUSPEND_SCAN_DISABLE, + SUSPEND_SCAN_ENABLE, + SUSPEND_DISCONNECTING, + + SUSPEND_POWERING_DOWN, + + SUSPEND_PREPARE_NOTIFIER, + __SUSPEND_NUM_TASKS +}; + +enum suspended_state { + BT_RUNNING = 0, + BT_SUSPEND_DISCONNECT, + BT_SUSPEND_COMPLETE, +}; + struct hci_conn_hash { struct list_head list; unsigned int acl_num; @@ -260,6 +285,7 @@ struct hci_dev { __u8 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; + __u8 err_data_reporting; __u16 page_scan_interval; __u16 page_scan_window; __u8 page_scan_type; @@ -389,11 +415,28 @@ struct hci_dev { void *smp_bredr_data; struct discovery_state discovery; + + int discovery_old_state; + bool discovery_paused; + int advertising_old_state; + bool advertising_paused; + + struct notifier_block suspend_notifier; + struct work_struct suspend_prepare; + enum suspended_state suspend_state_next; + enum suspended_state suspend_state; + bool scanning_paused; + bool suspended; + + wait_queue_head_t suspend_wait_q; + DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); + struct hci_conn_hash conn_hash; struct list_head mgmt_pending; struct list_head blacklist; struct list_head whitelist; + struct list_head wakeable; struct list_head uuids; struct list_head link_keys; struct list_head long_term_keys; @@ -575,6 +618,7 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; + bool wakeable; }; extern struct list_head hci_dev_list; @@ -1477,6 +1521,8 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); +u32 hci_conn_get_phy(struct hci_conn *conn); + /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 8e9138acdae1..9352bb1bf34c 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -144,19 +144,19 @@ struct hci_dev_req { struct hci_dev_list_req { __u16 dev_num; - struct hci_dev_req dev_req[0]; /* hci_dev_req structures */ + struct hci_dev_req dev_req[]; /* hci_dev_req structures */ }; struct hci_conn_list_req { __u16 dev_id; __u16 conn_num; - struct hci_conn_info conn_info[0]; + struct hci_conn_info conn_info[]; }; struct hci_conn_info_req { bdaddr_t bdaddr; __u8 type; - struct hci_conn_info conn_info[0]; + struct hci_conn_info conn_info[]; }; struct hci_auth_info_req { diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 093aedebdf0c..537aaead259f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -119,6 +119,10 @@ struct l2cap_conninfo { #define L2CAP_LE_CONN_REQ 0x14 #define L2CAP_LE_CONN_RSP 0x15 #define L2CAP_LE_CREDITS 0x16 +#define L2CAP_ECRED_CONN_REQ 0x17 +#define L2CAP_ECRED_CONN_RSP 0x18 +#define L2CAP_ECRED_RECONF_REQ 0x19 +#define L2CAP_ECRED_RECONF_RSP 0x1a /* L2CAP extended feature mask */ #define L2CAP_FEAT_FLOWCTL 0x00000001 @@ -290,6 +294,8 @@ struct l2cap_conn_rsp { #define L2CAP_CR_LE_ENCRYPTION 0x0008 #define L2CAP_CR_LE_INVALID_SCID 0x0009 #define L2CAP_CR_LE_SCID_IN_USE 0X000A +#define L2CAP_CR_LE_UNACCEPT_PARAMS 0X000B +#define L2CAP_CR_LE_INVALID_PARAMS 0X000C /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -299,14 +305,14 @@ struct l2cap_conn_rsp { struct l2cap_conf_req { __le16 dcid; __le16 flags; - __u8 data[0]; + __u8 data[]; } __packed; struct l2cap_conf_rsp { __le16 scid; __le16 flags; __le16 result; - __u8 data[0]; + __u8 data[]; } __packed; #define L2CAP_CONF_SUCCESS 0x0000 @@ -322,7 +328,7 @@ struct l2cap_conf_rsp { struct l2cap_conf_opt { __u8 type; __u8 len; - __u8 val[0]; + __u8 val[]; } __packed; #define L2CAP_CONF_OPT_SIZE 2 @@ -359,6 +365,7 @@ struct l2cap_conf_rfc { * ever be used in the BR/EDR configuration phase. */ #define L2CAP_MODE_LE_FLOWCTL 0x80 +#define L2CAP_MODE_EXT_FLOWCTL 0x81 struct l2cap_conf_efs { __u8 id; @@ -392,7 +399,7 @@ struct l2cap_info_req { struct l2cap_info_rsp { __le16 type; __le16 result; - __u8 data[0]; + __u8 data[]; } __packed; struct l2cap_create_chan_req { @@ -483,6 +490,39 @@ struct l2cap_le_credits { __le16 credits; } __packed; +#define L2CAP_ECRED_MIN_MTU 64 +#define L2CAP_ECRED_MIN_MPS 64 + +struct l2cap_ecred_conn_req { + __le16 psm; + __le16 mtu; + __le16 mps; + __le16 credits; + __le16 scid[0]; +} __packed; + +struct l2cap_ecred_conn_rsp { + __le16 mtu; + __le16 mps; + __le16 credits; + __le16 result; + __le16 dcid[0]; +}; + +struct l2cap_ecred_reconf_req { + __le16 mtu; + __le16 mps; + __le16 scid[0]; +} __packed; + +#define L2CAP_RECONF_SUCCESS 0x0000 +#define L2CAP_RECONF_INVALID_MTU 0x0001 +#define L2CAP_RECONF_INVALID_MPS 0x0002 + +struct l2cap_ecred_reconf_rsp { + __le16 result; +} __packed; + /* ----- L2CAP channels and connections ----- */ struct l2cap_seq_list { __u16 head; @@ -724,6 +764,7 @@ enum { FLAG_EFS_ENABLE, FLAG_DEFER_SETUP, FLAG_LE_CONN_REQ_SENT, + FLAG_ECRED_CONN_REQ_SENT, FLAG_PENDING_SECURITY, FLAG_HOLD_HCI_CONN, }; @@ -917,12 +958,14 @@ static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan) } extern bool disable_ertm; +extern bool enable_ecred; int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); bool l2cap_is_socket(struct socket *sock); void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan); +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan); void __l2cap_connect_rsp_defer(struct l2cap_chan *chan); int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm); @@ -932,6 +975,7 @@ struct l2cap_chan *l2cap_chan_create(void); void l2cap_chan_close(struct l2cap_chan *chan, int reason); int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst, u8 dst_type); +int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len); void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan, bool initiator); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index a90666af05bd..f41cd87550dc 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -101,7 +101,8 @@ struct mgmt_rp_read_index_list { #define MGMT_SETTING_PRIVACY 0x00002000 #define MGMT_SETTING_CONFIGURATION 0x00004000 #define MGMT_SETTING_STATIC_ADDRESS 0x00008000 -#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 +#define MGMT_SETTING_PHY_CONFIGURATION 0x00010000 +#define MGMT_SETTING_WIDEBAND_SPEECH 0x00020000 #define MGMT_OP_READ_INFO 0x0004 #define MGMT_READ_INFO_SIZE 0 @@ -671,6 +672,8 @@ struct mgmt_cp_set_blocked_keys { } __packed; #define MGMT_OP_SET_BLOCKED_KEYS_SIZE 2 +#define MGMT_OP_SET_WIDEBAND_SPEECH 0x0047 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index da4acefe39c8..99d26879b02a 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -34,7 +34,6 @@ #define RFCOMM_DEFAULT_MTU 127 #define RFCOMM_DEFAULT_CREDITS 7 -#define RFCOMM_MAX_L2CAP_MTU 1013 #define RFCOMM_MAX_CREDITS 40 #define RFCOMM_SKB_HEAD_RESERVE 8 @@ -356,7 +355,7 @@ struct rfcomm_dev_info { struct rfcomm_dev_list_req { u16 dev_num; - struct rfcomm_dev_info dev_info[0]; + struct rfcomm_dev_info dev_info[]; }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index efd8d47f6997..51b9893d4ccb 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -137,6 +137,7 @@ enum flow_action_id { FLOW_ACTION_CSUM, FLOW_ACTION_MARK, FLOW_ACTION_PTYPE, + FLOW_ACTION_PRIORITY, FLOW_ACTION_WAKE, FLOW_ACTION_QUEUE, FLOW_ACTION_SAMPLE, @@ -163,19 +164,17 @@ enum flow_action_mangle_base { }; enum flow_action_hw_stats_type_bit { - FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE_BIT, - FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT, + FLOW_ACTION_HW_STATS_IMMEDIATE_BIT, + FLOW_ACTION_HW_STATS_DELAYED_BIT, }; enum flow_action_hw_stats_type { - FLOW_ACTION_HW_STATS_TYPE_DISABLED = 0, - FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE = - BIT(FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE_BIT), - FLOW_ACTION_HW_STATS_TYPE_DELAYED = - BIT(FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT), - FLOW_ACTION_HW_STATS_TYPE_ANY = - FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE | - FLOW_ACTION_HW_STATS_TYPE_DELAYED, + FLOW_ACTION_HW_STATS_DISABLED = 0, + FLOW_ACTION_HW_STATS_IMMEDIATE = + BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT), + FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT), + FLOW_ACTION_HW_STATS_ANY = FLOW_ACTION_HW_STATS_IMMEDIATE | + FLOW_ACTION_HW_STATS_DELAYED, }; typedef void (*action_destr)(void *priv); @@ -213,6 +212,7 @@ struct flow_action_entry { u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ u16 ptype; /* FLOW_ACTION_PTYPE */ + u32 priority; /* FLOW_ACTION_PRIORITY */ struct { /* FLOW_ACTION_QUEUE */ u32 ctx; u32 index; @@ -285,8 +285,8 @@ static inline bool flow_offload_has_one_action(const struct flow_action *action) __act = &(__actions)->entries[++__i]) static inline bool -flow_action_mixed_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack) +flow_action_mixed_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack) { const struct flow_action_entry *action_entry; u8 uninitialized_var(last_hw_stats_type); @@ -313,20 +313,20 @@ flow_action_first_entry_get(const struct flow_action *action) } static inline bool -__flow_action_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack, - bool check_allow_bit, - enum flow_action_hw_stats_type_bit allow_bit) +__flow_action_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack, + bool check_allow_bit, + enum flow_action_hw_stats_type_bit allow_bit) { const struct flow_action_entry *action_entry; if (!flow_action_has_entries(action)) return true; - if (!flow_action_mixed_hw_stats_types_check(action, extack)) + if (!flow_action_mixed_hw_stats_check(action, extack)) return false; action_entry = flow_action_first_entry_get(action); if (!check_allow_bit && - action_entry->hw_stats_type != FLOW_ACTION_HW_STATS_TYPE_ANY) { + action_entry->hw_stats_type != FLOW_ACTION_HW_STATS_ANY) { NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\""); return false; } else if (check_allow_bit && @@ -338,19 +338,18 @@ __flow_action_hw_stats_types_check(const struct flow_action *action, } static inline bool -flow_action_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack, - enum flow_action_hw_stats_type_bit allow_bit) +flow_action_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack, + enum flow_action_hw_stats_type_bit allow_bit) { - return __flow_action_hw_stats_types_check(action, extack, - true, allow_bit); + return __flow_action_hw_stats_check(action, extack, true, allow_bit); } static inline bool -flow_action_basic_hw_stats_types_check(const struct flow_action *action, - struct netlink_ext_ack *extack) +flow_action_basic_hw_stats_check(const struct flow_action *action, + struct netlink_ext_ack *extack) { - return __flow_action_hw_stats_types_check(action, extack, false, 0); + return __flow_action_hw_stats_check(action, extack, false, 0); } struct flow_rule { diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 5ae5295aa46d..e1e588387103 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -45,7 +45,7 @@ enum nf_ct_ext_id { struct nf_ct_ext { u8 offset[NF_CT_EXT_NUM]; u8 len; - char data[0]; + char data[]; }; static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 6dd72396f534..659b0ea25b4d 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -14,7 +14,7 @@ struct nf_ct_timeout { __u16 l3num; const struct nf_conntrack_l4proto *l4proto; - char data[0]; + char data[]; }; struct ctnl_timeout { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 6890f1ca3e31..f523ea87b6ae 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -19,11 +19,17 @@ enum flow_offload_tuple_dir; struct nf_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; + struct flow_dissector_key_control enc_control; struct flow_dissector_key_basic basic; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; }; + struct flow_dissector_key_keyid enc_key_id; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; struct flow_dissector_key_tcp tcp; struct flow_dissector_key_ports tp; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4170c033d461..5d80e09f8148 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -224,7 +224,7 @@ int nft_validate_register_store(const struct nft_ctx *ctx, */ struct nft_userdata { u8 len; - unsigned char data[0]; + unsigned char data[]; }; /** @@ -385,21 +385,14 @@ struct nft_set_ops { * struct nft_set_type - nf_tables set type * * @ops: set ops for this type - * @list: used internally - * @owner: module reference * @features: features supported by the implementation */ struct nft_set_type { const struct nft_set_ops ops; - struct list_head list; - struct module *owner; u32 features; }; #define to_set_type(o) container_of(o, struct nft_set_type, ops) -int nft_register_set(struct nft_set_type *type); -void nft_unregister_set(struct nft_set_type *type); - /** * struct nft_set - nf_tables set instance * @@ -572,7 +565,7 @@ struct nft_set_ext_tmpl { struct nft_set_ext { u8 genmask; u8 offset[NFT_SET_EXT_NUM]; - char data[0]; + char data[]; }; static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) @@ -673,6 +666,10 @@ static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_OBJREF); } +struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nlattr *attr); + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, const u32 *data, @@ -849,8 +846,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) return (void *)expr->data; } -struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, - const struct nlattr *nla); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); @@ -895,6 +890,18 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) return (void *)&rule->data[rule->dlen]; } +static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_expr *expr; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) { + expr = nft_set_ext_expr(ext); + expr->ops->eval(expr, regs, pkt); + } +} + /* * The last pointer isn't really necessary, but the compiler isn't able to * determine that the result of nft_expr_last() is always the same since it @@ -1253,9 +1260,6 @@ void nft_trace_notify(struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) -#define MODULE_ALIAS_NFT_SET() \ - MODULE_ALIAS("nft-set") - #define MODULE_ALIAS_NFT_OBJ(type) \ MODULE_ALIAS("nft-obj-" __stringify(type)) @@ -1385,7 +1389,7 @@ struct nft_trans { int msg_type; bool put_net; struct nft_ctx ctx; - char data[0]; + char data[]; }; struct nft_trans_rule { diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 29e7e1021267..78516de14d31 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -69,12 +69,13 @@ extern const struct nft_expr_ops nft_payload_fast_ops; extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; -extern struct nft_set_type nft_set_rhash_type; -extern struct nft_set_type nft_set_hash_type; -extern struct nft_set_type nft_set_hash_fast_type; -extern struct nft_set_type nft_set_rbtree_type; -extern struct nft_set_type nft_set_bitmap_type; -extern struct nft_set_type nft_set_pipapo_type; +extern const struct nft_set_type nft_set_rhash_type; +extern const struct nft_set_type nft_set_hash_type; +extern const struct nft_set_type nft_set_hash_fast_type; +extern const struct nft_set_type nft_set_rbtree_type; +extern const struct nft_set_type nft_set_bitmap_type; +extern const struct nft_set_type nft_set_pipapo_type; +extern const struct nft_set_type nft_set_pipapo_avx2_type; struct nft_expr; struct nft_regs; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 20d2c6419612..9092e697059e 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -75,7 +75,15 @@ struct qdisc_watchdog { void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, clockid_t clockid); void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires); + +void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, + u64 delta_ns); + +static inline void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, + u64 expires) +{ + return qdisc_watchdog_schedule_range_ns(wd, expires, 0ULL); +} static inline void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index b22a1f641f02..00bfee70609e 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -27,8 +27,8 @@ struct tcf_skbedit { }; #define to_skbedit(a) ((struct tcf_skbedit *)a) -/* Return true iff action is mark */ -static inline bool is_tcf_skbedit_mark(const struct tc_action *a) +/* Return true iff action is the one identified by FLAG. */ +static inline bool is_tcf_skbedit_with_flag(const struct tc_action *a, u32 flag) { #ifdef CONFIG_NET_CLS_ACT u32 flags; @@ -37,12 +37,18 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a) rcu_read_lock(); flags = rcu_dereference(to_skbedit(a)->params)->flags; rcu_read_unlock(); - return flags == SKBEDIT_F_MARK; + return flags == flag; } #endif return false; } +/* Return true iff action is mark */ +static inline bool is_tcf_skbedit_mark(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_MARK); +} + static inline u32 tcf_skbedit_mark(const struct tc_action *a) { u32 mark; @@ -57,17 +63,7 @@ static inline u32 tcf_skbedit_mark(const struct tc_action *a) /* Return true iff action is ptype */ static inline bool is_tcf_skbedit_ptype(const struct tc_action *a) { -#ifdef CONFIG_NET_CLS_ACT - u32 flags; - - if (a->ops && a->ops->id == TCA_ID_SKBEDIT) { - rcu_read_lock(); - flags = rcu_dereference(to_skbedit(a)->params)->flags; - rcu_read_unlock(); - return flags == SKBEDIT_F_PTYPE; - } -#endif - return false; + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PTYPE); } static inline u32 tcf_skbedit_ptype(const struct tc_action *a) @@ -81,4 +77,21 @@ static inline u32 tcf_skbedit_ptype(const struct tc_action *a) return ptype; } +/* Return true iff action is priority */ +static inline bool is_tcf_skbedit_priority(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_PRIORITY); +} + +static inline u32 tcf_skbedit_priority(const struct tc_action *a) +{ + u32 priority; + + rcu_read_lock(); + priority = rcu_dereference(to_skbedit(a)->params)->priority; + rcu_read_unlock(); + + return priority; +} + #endif /* __NET_TC_SKBEDIT_H */ diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 2b3df076e5b6..e1057b255f69 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -28,8 +28,10 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); - struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + struct tcf_tunnel_key_params *params; + params = rcu_dereference_protected(t->params, + lockdep_is_held(&a->tcfa_lock)); if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; #endif @@ -40,8 +42,10 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); - struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + struct tcf_tunnel_key_params *params; + params = rcu_dereference_protected(t->params, + lockdep_is_held(&a->tcfa_lock)); if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; #endif diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 42f7ca38ad80..bfe621ea51b3 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -174,6 +174,16 @@ struct br_vlan_msg { __u32 ifindex; }; +enum { + BRIDGE_VLANDB_DUMP_UNSPEC, + BRIDGE_VLANDB_DUMP_FLAGS, + __BRIDGE_VLANDB_DUMP_MAX, +}; +#define BRIDGE_VLANDB_DUMP_MAX (__BRIDGE_VLANDB_DUMP_MAX - 1) + +/* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ +#define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ + /* Bridge vlan RTM attributes * [BRIDGE_VLANDB_ENTRY] = { * [BRIDGE_VLANDB_ENTRY_INFO] @@ -192,10 +202,46 @@ enum { BRIDGE_VLANDB_ENTRY_INFO, BRIDGE_VLANDB_ENTRY_RANGE, BRIDGE_VLANDB_ENTRY_STATE, + BRIDGE_VLANDB_ENTRY_TUNNEL_INFO, + BRIDGE_VLANDB_ENTRY_STATS, __BRIDGE_VLANDB_ENTRY_MAX, }; #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { + * [BRIDGE_VLANDB_TINFO_ID] + * ... + * } + * } + */ +enum { + BRIDGE_VLANDB_TINFO_UNSPEC, + BRIDGE_VLANDB_TINFO_ID, + BRIDGE_VLANDB_TINFO_CMD, + __BRIDGE_VLANDB_TINFO_MAX, +}; +#define BRIDGE_VLANDB_TINFO_MAX (__BRIDGE_VLANDB_TINFO_MAX - 1) + +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_STATS] = { + * [BRIDGE_VLANDB_STATS_RX_BYTES] + * ... + * } + * ... + * } + */ +enum { + BRIDGE_VLANDB_STATS_UNSPEC, + BRIDGE_VLANDB_STATS_RX_BYTES, + BRIDGE_VLANDB_STATS_RX_PACKETS, + BRIDGE_VLANDB_STATS_TX_BYTES, + BRIDGE_VLANDB_STATS_TX_PACKETS, + BRIDGE_VLANDB_STATS_PAD, + __BRIDGE_VLANDB_STATS_MAX, +}; +#define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h index 0b9c3beda345..90f9b4e1ba27 100644 --- a/include/uapi/linux/mii.h +++ b/include/uapi/linux/mii.h @@ -134,11 +134,16 @@ /* MAC and PHY tx_config_Reg[15:0] for SGMII in-band auto-negotiation.*/ #define ADVERTISE_SGMII 0x0001 /* MAC can do SGMII */ #define LPA_SGMII 0x0001 /* PHY can do SGMII */ +#define LPA_SGMII_SPD_MASK 0x0c00 /* SGMII speed mask */ +#define LPA_SGMII_FULL_DUPLEX 0x1000 /* SGMII full duplex */ #define LPA_SGMII_DPX_SPD_MASK 0x1C00 /* SGMII duplex and speed bits */ +#define LPA_SGMII_10 0x0000 /* 10Mbps */ #define LPA_SGMII_10HALF 0x0000 /* Can do 10mbps half-duplex */ #define LPA_SGMII_10FULL 0x1000 /* Can do 10mbps full-duplex */ +#define LPA_SGMII_100 0x0400 /* 100Mbps */ #define LPA_SGMII_100HALF 0x0400 /* Can do 100mbps half-duplex */ #define LPA_SGMII_100FULL 0x1400 /* Can do 100mbps full-duplex */ +#define LPA_SGMII_1000 0x0800 /* 1000Mbps */ #define LPA_SGMII_1000HALF 0x0800 /* Can do 1000mbps half-duplex */ #define LPA_SGMII_1000FULL 0x1800 /* Can do 1000mbps full-duplex */ #define LPA_SGMII_LINK 0x8000 /* PHY link with copper-side partner */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 065218a20bb7..9c3d2d04d6a1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1770,6 +1770,7 @@ enum nft_tunnel_opts_attributes { NFTA_TUNNEL_KEY_OPTS_UNSPEC, NFTA_TUNNEL_KEY_OPTS_VXLAN, NFTA_TUNNEL_KEY_OPTS_ERSPAN, + NFTA_TUNNEL_KEY_OPTS_GENEVE, __NFTA_TUNNEL_KEY_OPTS_MAX }; #define NFTA_TUNNEL_KEY_OPTS_MAX (__NFTA_TUNNEL_KEY_OPTS_MAX - 1) @@ -1791,6 +1792,15 @@ enum nft_tunnel_opts_erspan_attributes { }; #define NFTA_TUNNEL_KEY_ERSPAN_MAX (__NFTA_TUNNEL_KEY_ERSPAN_MAX - 1) +enum nft_tunnel_opts_geneve_attributes { + NFTA_TUNNEL_KEY_GENEVE_UNSPEC, + NFTA_TUNNEL_KEY_GENEVE_CLASS, + NFTA_TUNNEL_KEY_GENEVE_TYPE, + NFTA_TUNNEL_KEY_GENEVE_DATA, + __NFTA_TUNNEL_KEY_GENEVE_MAX +}; +#define NFTA_TUNNEL_KEY_GENEVE_MAX (__NFTA_TUNNEL_KEY_GENEVE_MAX - 1) + enum nft_tunnel_flags { NFT_TUNNEL_F_ZERO_CSUM_TX = (1 << 0), NFT_TUNNEL_F_DONT_FRAGMENT = (1 << 1), diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 3c586a19baea..434e6506abaa 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h @@ -1,4 +1,3 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * linux/include/linux/netfilter/xt_IDLETIMER.h * @@ -33,6 +32,7 @@ #include <linux/types.h> #define MAX_IDLETIMER_LABEL_SIZE 28 +#define XT_IDLETIMER_ALARM 0x01 struct idletimer_tg_info { __u32 timeout; @@ -43,4 +43,14 @@ struct idletimer_tg_info { struct idletimer_tg *timer __attribute__((aligned(8))); }; +struct idletimer_tg_info_v1 { + __u32 timeout; + + char label[MAX_IDLETIMER_LABEL_SIZE]; + + __u8 timer_type; + + /* for kernel module internal use only */ + struct idletimer_tg *timer __attribute__((aligned(8))); +}; #endif diff --git a/include/uapi/linux/netfilter_bridge/ebt_among.h b/include/uapi/linux/netfilter_bridge/ebt_among.h index 9acf757bc1f7..73b26a280c4f 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_among.h +++ b/include/uapi/linux/netfilter_bridge/ebt_among.h @@ -40,7 +40,7 @@ struct ebt_mac_wormhash_tuple { struct ebt_mac_wormhash { int table[257]; int poolsize; - struct ebt_mac_wormhash_tuple pool[0]; + struct ebt_mac_wormhash_tuple pool[]; }; #define ebt_mac_wormhash_size(x) ((x) ? sizeof(struct ebt_mac_wormhash) \ diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index ea39287d59c8..7307a29a103e 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -911,6 +911,8 @@ enum { TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + TCA_FQ_TIMER_SLACK, /* timer slack */ + __TCA_FQ_MAX }; diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h index 0029d5119be6..2fd253a61a2a 100644 --- a/net/bluetooth/a2mp.h +++ b/net/bluetooth/a2mp.h @@ -36,14 +36,14 @@ struct a2mp_cmd { __u8 code; __u8 ident; __le16 len; - __u8 data[0]; + __u8 data[]; } __packed; /* A2MP command codes */ #define A2MP_COMMAND_REJ 0x01 struct a2mp_cmd_rej { __le16 reason; - __u8 data[0]; + __u8 data[]; } __packed; #define A2MP_DISCOVER_REQ 0x02 @@ -62,7 +62,7 @@ struct a2mp_cl { struct a2mp_discov_rsp { __le16 mtu; __le16 ext_feat; - struct a2mp_cl cl[0]; + struct a2mp_cl cl[]; } __packed; #define A2MP_CHANGE_NOTIFY 0x04 @@ -93,7 +93,7 @@ struct a2mp_amp_assoc_req { struct a2mp_amp_assoc_rsp { __u8 id; __u8 status; - __u8 amp_assoc[0]; + __u8 amp_assoc[]; } __packed; #define A2MP_CREATEPHYSLINK_REQ 0x0A @@ -101,7 +101,7 @@ struct a2mp_amp_assoc_rsp { struct a2mp_physlink_req { __u8 local_id; __u8 remote_id; - __u8 amp_assoc[0]; + __u8 amp_assoc[]; } __packed; #define A2MP_CREATEPHYSLINK_RSP 0x0B diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 24f18b133959..9680473ed7ef 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -74,14 +74,14 @@ struct bnep_setup_conn_req { __u8 type; __u8 ctrl; __u8 uuid_size; - __u8 service[0]; + __u8 service[]; } __packed; struct bnep_set_filter_req { __u8 type; __u8 ctrl; __be16 len; - __u8 list[0]; + __u8 list[]; } __packed; struct bnep_control_rsp { @@ -93,7 +93,7 @@ struct bnep_control_rsp { struct bnep_ext_hdr { __u8 type; __u8 len; - __u8 data[0]; + __u8 data[]; } __packed; /* BNEP ioctl defines */ diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 87691404d0c6..e245bc155cc2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -467,6 +467,23 @@ static void hci_conn_auto_accept(struct work_struct *work) &conn->dst); } +static void le_disable_advertising(struct hci_dev *hdev) +{ + if (ext_adv_capable(hdev)) { + struct hci_cp_le_set_ext_adv_enable cp; + + cp.enable = 0x00; + cp.num_of_sets = 0x00; + + hci_send_cmd(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(cp), + &cp); + } else { + u8 enable = 0x00; + hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), + &enable); + } +} + static void le_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, @@ -481,9 +498,8 @@ static void le_conn_timeout(struct work_struct *work) * (which doesn't have a timeout of its own). */ if (conn->role == HCI_ROLE_SLAVE) { - u8 enable = 0x00; - hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), - &enable); + /* Disable LE Advertising */ + le_disable_advertising(hdev); hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); return; } @@ -898,6 +914,16 @@ static void hci_req_directed_advertising(struct hci_request *req, cp.peer_addr_type = conn->dst_type; bacpy(&cp.peer_addr, &conn->dst); + /* As per Core Spec 5.2 Vol 2, PART E, Sec 7.8.53, for + * advertising_event_property LE_LEGACY_ADV_DIRECT_IND + * does not supports advertising data when the advertising set already + * contains some, the controller shall return erroc code 'Invalid + * HCI Command Parameters(0x12). + * So it is required to remove adv set for handle 0x00. since we use + * instance 0 for directed adv. + */ + hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(cp.handle), &cp.handle); + hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp); if (own_addr_type == ADDR_LE_DEV_RANDOM && @@ -1029,11 +1055,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * anyway have to disable it in order to start directed * advertising. */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - u8 enable = 0x00; - hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), - &enable); - } + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(&req); /* If requested to connect as slave use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { @@ -1725,3 +1748,110 @@ struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle) return hchan; } + +u32 hci_conn_get_phy(struct hci_conn *conn) +{ + u32 phys = 0; + + hci_dev_lock(conn->hdev); + + /* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471: + * Table 6.2: Packets defined for synchronous, asynchronous, and + * CSB logical transport types. + */ + switch (conn->type) { + case SCO_LINK: + /* SCO logical transport (1 Mb/s): + * HV1, HV2, HV3 and DV. + */ + phys |= BT_PHY_BR_1M_1SLOT; + + break; + + case ACL_LINK: + /* ACL logical transport (1 Mb/s) ptt=0: + * DH1, DM3, DH3, DM5 and DH5. + */ + phys |= BT_PHY_BR_1M_1SLOT; + + if (conn->pkt_type & (HCI_DM3 | HCI_DH3)) + phys |= BT_PHY_BR_1M_3SLOT; + + if (conn->pkt_type & (HCI_DM5 | HCI_DH5)) + phys |= BT_PHY_BR_1M_5SLOT; + + /* ACL logical transport (2 Mb/s) ptt=1: + * 2-DH1, 2-DH3 and 2-DH5. + */ + if (!(conn->pkt_type & HCI_2DH1)) + phys |= BT_PHY_EDR_2M_1SLOT; + + if (!(conn->pkt_type & HCI_2DH3)) + phys |= BT_PHY_EDR_2M_3SLOT; + + if (!(conn->pkt_type & HCI_2DH5)) + phys |= BT_PHY_EDR_2M_5SLOT; + + /* ACL logical transport (3 Mb/s) ptt=1: + * 3-DH1, 3-DH3 and 3-DH5. + */ + if (!(conn->pkt_type & HCI_3DH1)) + phys |= BT_PHY_EDR_3M_1SLOT; + + if (!(conn->pkt_type & HCI_3DH3)) + phys |= BT_PHY_EDR_3M_3SLOT; + + if (!(conn->pkt_type & HCI_3DH5)) + phys |= BT_PHY_EDR_3M_5SLOT; + + break; + + case ESCO_LINK: + /* eSCO logical transport (1 Mb/s): EV3, EV4 and EV5 */ + phys |= BT_PHY_BR_1M_1SLOT; + + if (!(conn->pkt_type & (ESCO_EV4 | ESCO_EV5))) + phys |= BT_PHY_BR_1M_3SLOT; + + /* eSCO logical transport (2 Mb/s): 2-EV3, 2-EV5 */ + if (!(conn->pkt_type & ESCO_2EV3)) + phys |= BT_PHY_EDR_2M_1SLOT; + + if (!(conn->pkt_type & ESCO_2EV5)) + phys |= BT_PHY_EDR_2M_3SLOT; + + /* eSCO logical transport (3 Mb/s): 3-EV3, 3-EV5 */ + if (!(conn->pkt_type & ESCO_3EV3)) + phys |= BT_PHY_EDR_3M_1SLOT; + + if (!(conn->pkt_type & ESCO_3EV5)) + phys |= BT_PHY_EDR_3M_3SLOT; + + break; + + case LE_LINK: + if (conn->le_tx_phy & HCI_LE_SET_PHY_1M) + phys |= BT_PHY_LE_1M_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_1M) + phys |= BT_PHY_LE_1M_RX; + + if (conn->le_tx_phy & HCI_LE_SET_PHY_2M) + phys |= BT_PHY_LE_2M_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_2M) + phys |= BT_PHY_LE_2M_RX; + + if (conn->le_tx_phy & HCI_LE_SET_PHY_CODED) + phys |= BT_PHY_LE_CODED_TX; + + if (conn->le_rx_phy & HCI_LE_SET_PHY_CODED) + phys |= BT_PHY_LE_CODED_RX; + + break; + } + + hci_dev_unlock(conn->hdev); + + return phys; +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cbbc34a006d1..dbd2ad3a26ed 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -31,6 +31,8 @@ #include <linux/debugfs.h> #include <linux/crypto.h> #include <linux/property.h> +#include <linux/suspend.h> +#include <linux/wait.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -603,6 +605,9 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[8] & 0x01) hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); + if (hdev->commands[18] & 0x04) + hci_req_add(req, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL); + /* Some older Broadcom based Bluetooth 1.2 controllers do not * support the Read Page Scan Type command. Check support for * this command in the bit mask of supported commands. @@ -838,6 +843,26 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) sizeof(support), &support); } + /* Set erroneous data reporting if supported to the wideband speech + * setting value + */ + if (hdev->commands[18] & 0x08) { + bool enabled = hci_dev_test_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + + if (enabled != + (hdev->err_data_reporting == ERR_DATA_REPORTING_ENABLED)) { + struct hci_cp_write_def_err_data_reporting cp; + + cp.err_data_reporting = enabled ? + ERR_DATA_REPORTING_ENABLED : + ERR_DATA_REPORTING_DISABLED; + + hci_req_add(req, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING, + sizeof(cp), &cp); + } + } + /* Set Suggested Default Data Length to maximum if supported */ if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { struct hci_cp_le_write_def_data_len cp; @@ -1764,6 +1789,9 @@ int hci_dev_do_close(struct hci_dev *hdev) clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); + if (test_and_clear_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks)) + wake_up(&hdev->suspend_wait_q); + /* After this point our queues are empty * and no tasks are scheduled. */ hdev->close(hdev); @@ -2285,7 +2313,7 @@ void hci_link_keys_clear(struct hci_dev *hdev) { struct link_key *key; - list_for_each_entry_rcu(key, &hdev->link_keys, list) { + list_for_each_entry(key, &hdev->link_keys, list) { list_del_rcu(&key->list); kfree_rcu(key, rcu); } @@ -2295,7 +2323,7 @@ void hci_smp_ltks_clear(struct hci_dev *hdev) { struct smp_ltk *k; - list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + list_for_each_entry(k, &hdev->long_term_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } @@ -2305,7 +2333,7 @@ void hci_smp_irks_clear(struct hci_dev *hdev) { struct smp_irk *k; - list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) { + list_for_each_entry(k, &hdev->identity_resolving_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } @@ -2315,7 +2343,7 @@ void hci_blocked_keys_clear(struct hci_dev *hdev) { struct blocked_key *b; - list_for_each_entry_rcu(b, &hdev->blocked_keys, list) { + list_for_each_entry(b, &hdev->blocked_keys, list) { list_del_rcu(&b->list); kfree_rcu(b, rcu); } @@ -2327,7 +2355,7 @@ bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16]) struct blocked_key *b; rcu_read_lock(); - list_for_each_entry(b, &hdev->blocked_keys, list) { + list_for_each_entry_rcu(b, &hdev->blocked_keys, list) { if (b->type == type && !memcmp(b->val, val, sizeof(b->val))) { blocked = true; break; @@ -3241,6 +3269,93 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, } } +static int hci_suspend_wait_event(struct hci_dev *hdev) +{ +#define WAKE_COND \ + (find_first_bit(hdev->suspend_tasks, __SUSPEND_NUM_TASKS) == \ + __SUSPEND_NUM_TASKS) + + int i; + int ret = wait_event_timeout(hdev->suspend_wait_q, + WAKE_COND, SUSPEND_NOTIFIER_TIMEOUT); + + if (ret == 0) { + bt_dev_dbg(hdev, "Timed out waiting for suspend"); + for (i = 0; i < __SUSPEND_NUM_TASKS; ++i) { + if (test_bit(i, hdev->suspend_tasks)) + bt_dev_dbg(hdev, "Bit %d is set", i); + clear_bit(i, hdev->suspend_tasks); + } + + ret = -ETIMEDOUT; + } else { + ret = 0; + } + + return ret; +} + +static void hci_prepare_suspend(struct work_struct *work) +{ + struct hci_dev *hdev = + container_of(work, struct hci_dev, suspend_prepare); + + hci_dev_lock(hdev); + hci_req_prepare_suspend(hdev, hdev->suspend_state_next); + hci_dev_unlock(hdev); +} + +static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct hci_dev *hdev = + container_of(nb, struct hci_dev, suspend_notifier); + int ret = 0; + + /* If powering down, wait for completion. */ + if (mgmt_powering_down(hdev)) { + set_bit(SUSPEND_POWERING_DOWN, hdev->suspend_tasks); + ret = hci_suspend_wait_event(hdev); + if (ret) + goto done; + } + + /* Suspend notifier should only act on events when powered. */ + if (!hdev_is_powered(hdev)) + goto done; + + if (action == PM_SUSPEND_PREPARE) { + /* Suspend consists of two actions: + * - First, disconnect everything and make the controller not + * connectable (disabling scanning) + * - Second, program event filter/whitelist and enable scan + */ + hdev->suspend_state_next = BT_SUSPEND_DISCONNECT; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + ret = hci_suspend_wait_event(hdev); + + /* If the disconnect portion failed, don't attempt to complete + * by configuring the whitelist. The suspend notifier will + * follow a cancelled suspend with a PM_POST_SUSPEND + * notification. + */ + if (!ret) { + hdev->suspend_state_next = BT_SUSPEND_COMPLETE; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + ret = hci_suspend_wait_event(hdev); + } + } else if (action == PM_POST_SUSPEND) { + hdev->suspend_state_next = BT_RUNNING; + set_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + queue_work(hdev->req_workqueue, &hdev->suspend_prepare); + ret = hci_suspend_wait_event(hdev); + } + +done: + return ret ? notifier_from_errno(-EBUSY) : NOTIFY_STOP; +} /* Alloc HCI device */ struct hci_dev *hci_alloc_dev(void) { @@ -3299,6 +3414,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->blacklist); INIT_LIST_HEAD(&hdev->whitelist); + INIT_LIST_HEAD(&hdev->wakeable); INIT_LIST_HEAD(&hdev->uuids); INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); @@ -3318,6 +3434,7 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); INIT_WORK(&hdev->error_reset, hci_error_reset); + INIT_WORK(&hdev->suspend_prepare, hci_prepare_suspend); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); @@ -3326,6 +3443,7 @@ struct hci_dev *hci_alloc_dev(void) skb_queue_head_init(&hdev->raw_q); init_waitqueue_head(&hdev->req_wait_q); + init_waitqueue_head(&hdev->suspend_wait_q); INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); @@ -3437,6 +3555,11 @@ int hci_register_dev(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); + hdev->suspend_notifier.notifier_call = hci_suspend_notifier; + error = register_pm_notifier(&hdev->suspend_notifier); + if (error) + goto err_wqueue; + queue_work(hdev->req_workqueue, &hdev->power_on); return id; @@ -3470,6 +3593,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); + unregister_pm_notifier(&hdev->suspend_notifier); + if (!test_bit(HCI_INIT, &hdev->flags) && !hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) { @@ -4387,13 +4512,16 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_sco_hdr *hdr = (void *) skb->data; struct hci_conn *conn; - __u16 handle; + __u16 handle, flags; skb_pull(skb, HCI_SCO_HDR_SIZE); handle = __le16_to_cpu(hdr->handle); + flags = hci_flags(handle); + handle = hci_handle(handle); - BT_DBG("%s len %d handle 0x%4.4x", hdev->name, skb->len, handle); + BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len, + handle, flags); hdev->stat.sco_rx++; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6ddc4a74a5e4..20408d386268 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -901,6 +901,37 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, hdev->inq_tx_power = rp->tx_power; } +static void hci_cc_read_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_def_err_data_reporting *rp = (void *)skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->err_data_reporting = rp->err_data_reporting; +} + +static void hci_cc_write_def_err_data_reporting(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *)skb->data); + struct hci_cp_write_def_err_data_reporting *cp; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING); + if (!cp) + return; + + hdev->err_data_reporting = cp->err_data_reporting; +} + static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_pin_code_reply *rp = (void *) skb->data; @@ -2202,10 +2233,22 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) + if (conn) { + u8 type = conn->type; + mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); + /* If the disconnection failed for any reason, the upper layer + * does not retry to disconnect in current implementation. + * Hence, we need to do some basic cleanup here and re-enable + * advertising if necessary. + */ + hci_conn_del(conn); + if (type == LE_LINK) + hci_req_reenable_advertising(hdev); + } + hci_dev_unlock(hdev); } @@ -2474,6 +2517,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_complete *ev = (void *) skb->data; + struct inquiry_entry *ie; struct hci_conn *conn; BT_DBG("%s", hdev->name); @@ -2482,6 +2526,21 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { + /* Connection may not exist if auto-connected. Check the inquiry + * cache to see if we've already discovered this bdaddr before. + * If found and link is an ACL type, create a connection class + * automatically. + */ + ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); + if (ie && ev->link_type == ACL_LINK) { + conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, + HCI_ROLE_SLAVE); + if (!conn) { + bt_dev_err(hdev, "no memory for new conn"); + goto unlock; + } + } + if (ev->link_type != SCO_LINK) goto unlock; @@ -2743,6 +2802,14 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_disconn_cfm(conn, ev->reason); hci_conn_del(conn); + /* The suspend notifier is waiting for all devices to disconnect so + * clear the bit from pending tasks and inform the wait queue. + */ + if (list_empty(&hdev->conn_hash.list) && + test_and_clear_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } + /* Re-enable advertising if necessary, since it might * have been disabled by the connection. From the * HCI_LE_Set_Advertise_Enable command description in @@ -3302,6 +3369,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cc_read_inq_rsp_tx_power(hdev, skb); break; + case HCI_OP_READ_DEF_ERR_DATA_REPORTING: + hci_cc_read_def_err_data_reporting(hdev, skb); + break; + + case HCI_OP_WRITE_DEF_ERR_DATA_REPORTING: + hci_cc_write_def_err_data_reporting(hdev, skb); + break; + case HCI_OP_PIN_CODE_REPLY: hci_cc_pin_code_reply(hdev, skb); break; @@ -4557,6 +4632,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, goto confirm; } + /* If there already exists link key in local host, leave the + * decision to user space since the remote device could be + * legitimate or malicious. + */ + if (hci_find_link_key(hdev, &ev->bdaddr)) { + bt_dev_dbg(hdev, "Local host already has link key"); + confirm_hint = 1; + goto confirm; + } + BT_DBG("Auto-accept of user confirmation with %ums delay", hdev->auto_accept_delay); @@ -5858,6 +5943,11 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) u8 status = 0, event = hdr->evt, req_evt = 0; u16 opcode = HCI_OP_NOP; + if (!event) { + bt_dev_warn(hdev, "Received unexpected HCI Event 00000000"); + goto done; + } + if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) { struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data; opcode = __le16_to_cpu(cmd_hdr->opcode); @@ -6069,6 +6159,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) req_complete_skb(hdev, status, opcode, orig_skb); } +done: kfree_skb(orig_skb); kfree_skb(skb); hdev->stat.evt_rx++; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 2a1b64dbf76e..649e1e5ed446 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -34,6 +34,9 @@ #define HCI_REQ_PEND 1 #define HCI_REQ_CANCELED 2 +#define LE_SUSPEND_SCAN_WINDOW 0x0012 +#define LE_SUSPEND_SCAN_INTERVAL 0x0060 + void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); @@ -654,6 +657,11 @@ void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_dev *hdev = req->hdev; + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return; + } + if (use_ext_scan(hdev)) { struct hci_cp_le_set_ext_scan_enable cp; @@ -670,15 +678,55 @@ void hci_req_add_le_scan_disable(struct hci_request *req) } } -static void add_to_white_list(struct hci_request *req, - struct hci_conn_params *params) +static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr, + u8 bdaddr_type) +{ + struct hci_cp_le_del_from_white_list cp; + + cp.bdaddr_type = bdaddr_type; + bacpy(&cp.bdaddr, bdaddr); + + bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr, + cp.bdaddr_type); + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp); +} + +/* Adds connection to white list if needed. On error, returns -1. */ +static int add_to_white_list(struct hci_request *req, + struct hci_conn_params *params, u8 *num_entries, + bool allow_rpa) { struct hci_cp_le_add_to_white_list cp; + struct hci_dev *hdev = req->hdev; + + /* Already in white list */ + if (hci_bdaddr_list_lookup(&hdev->le_white_list, ¶ms->addr, + params->addr_type)) + return 0; + + /* Select filter policy to accept all advertising */ + if (*num_entries >= hdev->le_white_list_size) + return -1; + + /* White list can not be used with RPAs */ + if (!allow_rpa && + hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { + return -1; + } + /* During suspend, only wakeable devices can be in whitelist */ + if (hdev->suspended && !params->wakeable) + return 0; + + *num_entries += 1; cp.bdaddr_type = params->addr_type; bacpy(&cp.bdaddr, ¶ms->addr); + bt_dev_dbg(hdev, "Add %pMR (0x%x) to whitelist", &cp.bdaddr, + cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); + + return 0; } static u8 update_white_list(struct hci_request *req) @@ -686,7 +734,14 @@ static u8 update_white_list(struct hci_request *req) struct hci_dev *hdev = req->hdev; struct hci_conn_params *params; struct bdaddr_list *b; - uint8_t white_list_entries = 0; + u8 num_entries = 0; + bool pend_conn, pend_report; + /* We allow whitelisting even with RPAs in suspend. In the worst case, + * we won't be able to wake from devices that use the privacy1.2 + * features. Additionally, once we support privacy1.2 and IRK + * offloading, we can update this to also check for those conditions. + */ + bool allow_rpa = hdev->suspended; /* Go through the current white list programmed into the * controller one by one and check if that address is still @@ -695,29 +750,28 @@ static u8 update_white_list(struct hci_request *req) * command to remove it from the controller. */ list_for_each_entry(b, &hdev->le_white_list, list) { - /* If the device is neither in pend_le_conns nor - * pend_le_reports then remove it from the whitelist. + pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, + b->bdaddr_type); + pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, + b->bdaddr_type); + + /* If the device is not likely to connect or report, + * remove it from the whitelist. */ - if (!hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) && - !hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - struct hci_cp_le_del_from_white_list cp; - - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); + if (!pend_conn && !pend_report) { + del_from_white_list(req, &b->bdaddr, b->bdaddr_type); continue; } - if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { - /* White list can not be used with RPAs */ + /* White list can not be used with RPAs */ + if (!allow_rpa && + hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { return 0x00; } - white_list_entries++; + num_entries++; } /* Since all no longer valid white list entries have been @@ -731,47 +785,17 @@ static u8 update_white_list(struct hci_request *req) * white list. */ list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ + if (add_to_white_list(req, params, &num_entries, allow_rpa)) return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); } /* After adding all new pending connections, walk through * the list of pending reports and also add these to the - * white list if there is still space. + * white list if there is still space. Abort if space runs out. */ list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ + if (add_to_white_list(req, params, &num_entries, allow_rpa)) return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); } /* Select filter policy to use white list */ @@ -866,6 +890,12 @@ void hci_req_add_le_passive_scan(struct hci_request *req) struct hci_dev *hdev = req->hdev; u8 own_addr_type; u8 filter_policy; + u8 window, interval; + + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return; + } /* Set require_privacy to false since no SCAN_REQ are send * during passive scanning. Not using an non-resolvable address @@ -896,8 +926,17 @@ void hci_req_add_le_passive_scan(struct hci_request *req) (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) filter_policy |= 0x02; - hci_req_start_scan(req, LE_SCAN_PASSIVE, hdev->le_scan_interval, - hdev->le_scan_window, own_addr_type, filter_policy); + if (hdev->suspended) { + window = LE_SUSPEND_SCAN_WINDOW; + interval = LE_SUSPEND_SCAN_INTERVAL; + } else { + window = hdev->le_scan_window; + interval = hdev->le_scan_interval; + } + + bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy); + hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window, + own_addr_type, filter_policy); } static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) @@ -918,6 +957,187 @@ static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) return adv_instance->scan_rsp_len; } +static void hci_req_clear_event_filter(struct hci_request *req) +{ + struct hci_cp_set_event_filter f; + + memset(&f, 0, sizeof(f)); + f.flt_type = HCI_FLT_CLEAR_ALL; + hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f); + + /* Update page scan state (since we may have modified it when setting + * the event filter). + */ + __hci_req_update_scan(req); +} + +static void hci_req_set_event_filter(struct hci_request *req) +{ + struct bdaddr_list *b; + struct hci_cp_set_event_filter f; + struct hci_dev *hdev = req->hdev; + u8 scan; + + /* Always clear event filter when starting */ + hci_req_clear_event_filter(req); + + list_for_each_entry(b, &hdev->wakeable, list) { + memset(&f, 0, sizeof(f)); + bacpy(&f.addr_conn_flt.bdaddr, &b->bdaddr); + f.flt_type = HCI_FLT_CONN_SETUP; + f.cond_type = HCI_CONN_SETUP_ALLOW_BDADDR; + f.addr_conn_flt.auto_accept = HCI_CONN_SETUP_AUTO_ON; + + bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr); + hci_req_add(req, HCI_OP_SET_EVENT_FLT, sizeof(f), &f); + } + + scan = !list_empty(&hdev->wakeable) ? SCAN_PAGE : SCAN_DISABLED; + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +} + +static void hci_req_config_le_suspend_scan(struct hci_request *req) +{ + /* Can't change params without disabling first */ + hci_req_add_le_scan_disable(req); + + /* Configure params and enable scanning */ + hci_req_add_le_passive_scan(req); + + /* Block suspend notifier on response */ + set_bit(SUSPEND_SCAN_ENABLE, req->hdev->suspend_tasks); +} + +static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, + status); + if (test_and_clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || + test_and_clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + wake_up(&hdev->suspend_wait_q); + } +} + +/* Call with hci_dev_lock */ +void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) +{ + int old_state; + struct hci_conn *conn; + struct hci_request req; + u8 page_scan; + int disconnect_counter; + + if (next == hdev->suspend_state) { + bt_dev_dbg(hdev, "Same state before and after: %d", next); + goto done; + } + + hdev->suspend_state = next; + hci_req_init(&req, hdev); + + if (next == BT_SUSPEND_DISCONNECT) { + /* Mark device as suspended */ + hdev->suspended = true; + + /* Pause discovery if not already stopped */ + old_state = hdev->discovery.state; + if (old_state != DISCOVERY_STOPPED) { + set_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + + hdev->discovery_paused = true; + hdev->discovery_old_state = old_state; + + /* Stop advertising */ + old_state = hci_dev_test_flag(hdev, HCI_ADVERTISING); + if (old_state) { + set_bit(SUSPEND_PAUSE_ADVERTISING, hdev->suspend_tasks); + cancel_delayed_work(&hdev->discov_off); + queue_delayed_work(hdev->req_workqueue, + &hdev->discov_off, 0); + } + + hdev->advertising_paused = true; + hdev->advertising_old_state = old_state; + /* Disable page scan */ + page_scan = SCAN_DISABLED; + hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); + + /* Disable LE passive scan */ + hci_req_add_le_scan_disable(&req); + + /* Mark task needing completion */ + set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + + /* Prevent disconnects from causing scanning to be re-enabled */ + hdev->scanning_paused = true; + + /* Run commands before disconnecting */ + hci_req_run(&req, suspend_req_complete); + + disconnect_counter = 0; + /* Soft disconnect everything (power off) */ + list_for_each_entry(conn, &hdev->conn_hash.list, list) { + hci_disconnect(conn, HCI_ERROR_REMOTE_POWER_OFF); + disconnect_counter++; + } + + if (disconnect_counter > 0) { + bt_dev_dbg(hdev, + "Had %d disconnects. Will wait on them", + disconnect_counter); + set_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks); + } + } else if (next == BT_SUSPEND_COMPLETE) { + /* Unpause to take care of updating scanning params */ + hdev->scanning_paused = false; + /* Enable event filter for paired devices */ + hci_req_set_event_filter(&req); + /* Enable passive scan at lower duty cycle */ + hci_req_config_le_suspend_scan(&req); + /* Pause scan changes again. */ + hdev->scanning_paused = true; + hci_req_run(&req, suspend_req_complete); + } else { + hdev->suspended = false; + hdev->scanning_paused = false; + + hci_req_clear_event_filter(&req); + /* Reset passive/background scanning to normal */ + hci_req_config_le_suspend_scan(&req); + + /* Unpause advertising */ + hdev->advertising_paused = false; + if (hdev->advertising_old_state) { + set_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks); + hci_dev_set_flag(hdev, HCI_ADVERTISING); + queue_work(hdev->req_workqueue, + &hdev->discoverable_update); + hdev->advertising_old_state = 0; + } + + /* Unpause discovery */ + hdev->discovery_paused = false; + if (hdev->discovery_old_state != DISCOVERY_STOPPED && + hdev->discovery_old_state != DISCOVERY_STOPPING) { + set_bit(SUSPEND_UNPAUSE_DISCOVERY, hdev->suspend_tasks); + hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + } + + hci_req_run(&req, suspend_req_complete); + } + + hdev->suspend_state = next; + +done: + clear_bit(SUSPEND_PREPARE_NOTIFIER, hdev->suspend_tasks); + wake_up(&hdev->suspend_wait_q); +} + static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) { u8 instance = hdev->cur_adv_instance; @@ -1499,7 +1719,7 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); + bt_dev_err(hdev, "failed to generate new RPA"); return err; } @@ -2015,6 +2235,9 @@ void __hci_req_update_scan(struct hci_request *req) if (mgmt_powering_down(hdev)) return; + if (hdev->scanning_paused) + return; + if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) || disconnected_whitelist_entries(hdev)) scan = SCAN_PAGE; @@ -2504,23 +2727,6 @@ static int active_scan(struct hci_request *req, unsigned long opt) BT_DBG("%s", hdev->name); - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - hci_dev_lock(hdev); - - /* Don't let discovery abort an outgoing connection attempt - * that's using directed advertising. - */ - if (hci_lookup_le_connect(hdev)) { - hci_dev_unlock(hdev); - return -EBUSY; - } - - cancel_adv_timeout(hdev); - hci_dev_unlock(hdev); - - __hci_req_disable_advertising(req); - } - /* If controller is scanning, it means the background scanning is * running. Thus, we should temporarily stop it in order to set the * discovery scanning parameters. diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index a7019fbeadd3..0e81614d235e 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -68,6 +68,8 @@ void __hci_req_update_eir(struct hci_request *req); void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); + void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index bef84b95e2c4..3b4fa27a44e6 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1279,7 +1279,7 @@ static int hidp_session_thread(void *arg) add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); /* This memory barrier is paired with wq_has_sleeper(). See * sock_poll_wait() for more information why this is needed. */ - smp_mb(); + smp_mb__before_atomic(); /* notify synchronous startup that we're ready */ atomic_inc(&session->state); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 195459a1e53e..8b0fca39989d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -45,6 +45,7 @@ #define LE_FLOWCTL_MAX_CREDITS 65535 bool disable_ertm; +bool enable_ecred; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD; @@ -419,6 +420,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); mutex_lock(&conn->chan_lock); + /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling + * this work. No need to call l2cap_chan_hold(chan) here again. + */ l2cap_chan_lock(chan); if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG) @@ -431,12 +435,12 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_close(chan, reason); - l2cap_chan_unlock(chan); - chan->ops->close(chan); - mutex_unlock(&conn->chan_lock); + l2cap_chan_unlock(chan); l2cap_chan_put(chan); + + mutex_unlock(&conn->chan_lock); } struct l2cap_chan *l2cap_chan_create(void) @@ -532,6 +536,17 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) skb_queue_head_init(&chan->tx_q); } +static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits) +{ + l2cap_le_flowctl_init(chan, tx_credits); + + /* L2CAP implementations shall support a minimum MPS of 64 octets */ + if (chan->mps < L2CAP_ECRED_MIN_MPS) { + chan->mps = L2CAP_ECRED_MIN_MPS; + chan->rx_credits = (chan->imtu / chan->mps) + 1; + } +} + void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, @@ -638,6 +653,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) break; case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: skb_queue_purge(&chan->tx_q); break; @@ -704,6 +720,27 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) &rsp); } +static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct l2cap_ecred_conn_rsp rsp; + u16 result; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) + result = L2CAP_CR_LE_AUTHORIZATION; + else + result = L2CAP_CR_LE_BAD_PSM; + + l2cap_state_change(chan, BT_DISCONN); + + memset(&rsp, 0, sizeof(rsp)); + + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), + &rsp); +} + static void l2cap_chan_connect_reject(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -749,8 +786,16 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED) { if (conn->hcon->type == ACL_LINK) l2cap_chan_connect_reject(chan); - else if (conn->hcon->type == LE_LINK) - l2cap_chan_le_connect_reject(chan); + else if (conn->hcon->type == LE_LINK) { + switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + l2cap_chan_le_connect_reject(chan); + break; + case L2CAP_MODE_EXT_FLOWCTL: + l2cap_chan_ecred_connect_reject(chan); + break; + } + } } l2cap_chan_del(chan, reason); @@ -1273,8 +1318,13 @@ static void l2cap_chan_ready(struct l2cap_chan *chan) chan->conf_state = 0; __clear_chan_timer(chan); - if (chan->mode == L2CAP_MODE_LE_FLOWCTL && !chan->tx_credits) - chan->ops->suspend(chan); + switch (chan->mode) { + case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: + if (!chan->tx_credits) + chan->ops->suspend(chan); + break; + } chan->state = BT_CONNECTED; @@ -1306,6 +1356,31 @@ static void l2cap_le_connect(struct l2cap_chan *chan) sizeof(req), &req); } +static void l2cap_ecred_connect(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct { + struct l2cap_ecred_conn_req req; + __le16 scid; + } __packed pdu; + + if (test_and_set_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + l2cap_ecred_init(chan, 0); + + pdu.req.psm = chan->psm; + pdu.req.mtu = cpu_to_le16(chan->imtu); + pdu.req.mps = cpu_to_le16(chan->mps); + pdu.req.credits = cpu_to_le16(chan->rx_credits); + pdu.scid = cpu_to_le16(chan->scid); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_CONN_REQ, + sizeof(pdu), &pdu); +} + static void l2cap_le_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -1318,8 +1393,12 @@ static void l2cap_le_start(struct l2cap_chan *chan) return; } - if (chan->state == BT_CONNECT) - l2cap_le_connect(chan); + if (chan->state == BT_CONNECT) { + if (chan->mode == L2CAP_MODE_EXT_FLOWCTL) + l2cap_ecred_connect(chan); + else + l2cap_le_connect(chan); + } } static void l2cap_start_connection(struct l2cap_chan *chan) @@ -1737,9 +1816,9 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_del(chan, err); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); } @@ -2505,6 +2584,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: + case L2CAP_MODE_EXT_FLOWCTL: /* Check outgoing MTU */ if (len > chan->omtu) return -EMSGSIZE; @@ -3773,6 +3853,45 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) &rsp); } +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +{ + struct { + struct l2cap_ecred_conn_rsp rsp; + __le16 dcid[5]; + } __packed pdu; + struct l2cap_conn *conn = chan->conn; + u16 ident = chan->ident; + int i = 0; + + if (!ident) + return; + + BT_DBG("chan %p ident %d", chan, ident); + + pdu.rsp.mtu = cpu_to_le16(chan->imtu); + pdu.rsp.mps = cpu_to_le16(chan->mps); + pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); + + mutex_lock(&conn->chan_lock); + + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident != ident) + continue; + + /* Reset ident so only one response is sent */ + chan->ident = 0; + + /* Include all channels pending with the same ident */ + pdu.dcid[i++] = cpu_to_le16(chan->scid); + } + + mutex_unlock(&conn->chan_lock); + + l2cap_send_cmd(conn, ident, L2CAP_ECRED_CONN_RSP, + sizeof(pdu.rsp) + i * sizeof(__le16), &pdu); +} + void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) { struct l2cap_conn_rsp rsp; @@ -4181,7 +4300,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, return 0; } - if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2) { + if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2 && + chan->state != BT_CONNECTED) { cmd_reject_invalid_cid(conn, cmd->ident, chan->scid, chan->dcid); goto unlock; @@ -4405,6 +4525,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, return 0; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); rsp.dcid = cpu_to_le16(chan->scid); @@ -4413,12 +4534,11 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); - l2cap_chan_hold(chan); l2cap_chan_del(chan, ECONNRESET); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -4450,20 +4570,21 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } + l2cap_chan_hold(chan); l2cap_chan_lock(chan); if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); + l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); return 0; } - l2cap_chan_hold(chan); l2cap_chan_del(chan, 0); - l2cap_chan_unlock(chan); - chan->ops->close(chan); + + l2cap_chan_unlock(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -5714,6 +5835,356 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, return 0; } +static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_conn_req *req = (void *) data; + struct { + struct l2cap_ecred_conn_rsp rsp; + __le16 dcid[5]; + } __packed pdu; + struct l2cap_chan *chan, *pchan; + u16 mtu, mps; + __le16 psm; + u8 result, len = 0; + int i, num_scid; + bool defer = false; + + if (!enable_ecred) + return -EINVAL; + + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto response; + } + + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + + if (mtu < L2CAP_ECRED_MIN_MTU || mps < L2CAP_ECRED_MIN_MPS) { + result = L2CAP_CR_LE_UNACCEPT_PARAMS; + goto response; + } + + psm = req->psm; + + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); + + memset(&pdu, 0, sizeof(pdu)); + + /* Check if we have socket listening on psm */ + pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, + &conn->hcon->dst, LE_LINK); + if (!pchan) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + + mutex_lock(&conn->chan_lock); + l2cap_chan_lock(pchan); + + if (!smp_sufficient_security(conn->hcon, pchan->sec_level, + SMP_ALLOW_STK)) { + result = L2CAP_CR_LE_AUTHENTICATION; + goto unlock; + } + + result = L2CAP_CR_LE_SUCCESS; + cmd_len -= sizeof(req); + num_scid = cmd_len / sizeof(u16); + + for (i = 0; i < num_scid; i++) { + u16 scid = __le16_to_cpu(req->scid[i]); + + BT_DBG("scid[%d] 0x%4.4x", i, scid); + + pdu.dcid[i] = 0x0000; + len += sizeof(*pdu.dcid); + + /* Check for valid dynamic CID range */ + if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { + result = L2CAP_CR_LE_INVALID_SCID; + continue; + } + + /* Check if we already have channel with that dcid */ + if (__l2cap_get_chan_by_dcid(conn, scid)) { + result = L2CAP_CR_LE_SCID_IN_USE; + continue; + } + + chan = pchan->ops->new_connection(pchan); + if (!chan) { + result = L2CAP_CR_LE_NO_MEM; + continue; + } + + bacpy(&chan->src, &conn->hcon->src); + bacpy(&chan->dst, &conn->hcon->dst); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); + chan->psm = psm; + chan->dcid = scid; + chan->omtu = mtu; + chan->remote_mps = mps; + + __l2cap_chan_add(conn, chan); + + l2cap_ecred_init(chan, __le16_to_cpu(req->credits)); + + /* Init response */ + if (!pdu.rsp.credits) { + pdu.rsp.mtu = cpu_to_le16(chan->imtu); + pdu.rsp.mps = cpu_to_le16(chan->mps); + pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + } + + pdu.dcid[i] = cpu_to_le16(chan->scid); + + __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); + + chan->ident = cmd->ident; + + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { + l2cap_state_change(chan, BT_CONNECT2); + defer = true; + chan->ops->defer(chan); + } else { + l2cap_chan_ready(chan); + } + } + +unlock: + l2cap_chan_unlock(pchan); + mutex_unlock(&conn->chan_lock); + l2cap_chan_put(pchan); + +response: + pdu.rsp.result = cpu_to_le16(result); + + if (defer) + return 0; + + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, + sizeof(pdu.rsp) + len, &pdu); + + return 0; +} + +static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_conn_rsp *rsp = (void *) data; + struct hci_conn *hcon = conn->hcon; + u16 mtu, mps, credits, result; + struct l2cap_chan *chan; + int err = 0, sec_level; + int i = 0; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + mtu = __le16_to_cpu(rsp->mtu); + mps = __le16_to_cpu(rsp->mps); + credits = __le16_to_cpu(rsp->credits); + result = __le16_to_cpu(rsp->result); + + BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits, + result); + + mutex_lock(&conn->chan_lock); + + cmd_len -= sizeof(*rsp); + + list_for_each_entry(chan, &conn->chan_l, list) { + u16 dcid; + + if (chan->ident != cmd->ident || + chan->mode != L2CAP_MODE_EXT_FLOWCTL || + chan->state == BT_CONNECTED) + continue; + + l2cap_chan_lock(chan); + + /* Check that there is a dcid for each pending channel */ + if (cmd_len < sizeof(dcid)) { + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + continue; + } + + dcid = __le16_to_cpu(rsp->dcid[i++]); + cmd_len -= sizeof(u16); + + BT_DBG("dcid[%d] 0x%4.4x", i, dcid); + + /* Check if dcid is already in use */ + if (dcid && __l2cap_get_chan_by_dcid(conn, dcid)) { + /* If a device receives a + * L2CAP_CREDIT_BASED_CONNECTION_RSP packet with an + * already-assigned Destination CID, then both the + * original channel and the new channel shall be + * immediately discarded and not used. + */ + l2cap_chan_del(chan, ECONNREFUSED); + l2cap_chan_unlock(chan); + chan = __l2cap_get_chan_by_dcid(conn, dcid); + l2cap_chan_lock(chan); + l2cap_chan_del(chan, ECONNRESET); + l2cap_chan_unlock(chan); + continue; + } + + switch (result) { + case L2CAP_CR_LE_AUTHENTICATION: + case L2CAP_CR_LE_ENCRYPTION: + /* If we already have MITM protection we can't do + * anything. + */ + if (hcon->sec_level > BT_SECURITY_MEDIUM) { + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + sec_level = hcon->sec_level + 1; + if (chan->sec_level < sec_level) + chan->sec_level = sec_level; + + /* We'll need to send a new Connect Request */ + clear_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags); + + smp_conn_security(hcon, chan->sec_level); + break; + + case L2CAP_CR_LE_BAD_PSM: + l2cap_chan_del(chan, ECONNREFUSED); + break; + + default: + /* If dcid was not set it means channels was refused */ + if (!dcid) { + l2cap_chan_del(chan, ECONNREFUSED); + break; + } + + chan->ident = 0; + chan->dcid = dcid; + chan->omtu = mtu; + chan->remote_mps = mps; + chan->tx_credits = credits; + l2cap_chan_ready(chan); + break; + } + + l2cap_chan_unlock(chan); + } + + mutex_unlock(&conn->chan_lock); + + return err; +} + +static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_ecred_reconf_req *req = (void *) data; + struct l2cap_ecred_reconf_rsp rsp; + u16 mtu, mps, result; + struct l2cap_chan *chan; + int i, num_scid; + + if (!enable_ecred) + return -EINVAL; + + if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { + result = L2CAP_CR_LE_INVALID_PARAMS; + goto respond; + } + + mtu = __le16_to_cpu(req->mtu); + mps = __le16_to_cpu(req->mps); + + BT_DBG("mtu %u mps %u", mtu, mps); + + if (mtu < L2CAP_ECRED_MIN_MTU) { + result = L2CAP_RECONF_INVALID_MTU; + goto respond; + } + + if (mps < L2CAP_ECRED_MIN_MPS) { + result = L2CAP_RECONF_INVALID_MPS; + goto respond; + } + + cmd_len -= sizeof(*req); + num_scid = cmd_len / sizeof(u16); + result = L2CAP_RECONF_SUCCESS; + + for (i = 0; i < num_scid; i++) { + u16 scid; + + scid = __le16_to_cpu(req->scid[i]); + if (!scid) + return -EPROTO; + + chan = __l2cap_get_chan_by_dcid(conn, scid); + if (!chan) + continue; + + /* If the MTU value is decreased for any of the included + * channels, then the receiver shall disconnect all + * included channels. + */ + if (chan->omtu > mtu) { + BT_ERR("chan %p decreased MTU %u -> %u", chan, + chan->omtu, mtu); + result = L2CAP_RECONF_INVALID_MTU; + } + + chan->omtu = mtu; + chan->remote_mps = mps; + } + +respond: + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_RECONF_RSP, sizeof(rsp), + &rsp); + + return 0; +} + +static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + u8 *data) +{ + struct l2cap_chan *chan; + struct l2cap_ecred_conn_rsp *rsp = (void *) data; + u16 result; + + if (cmd_len < sizeof(*rsp)) + return -EPROTO; + + result = __le16_to_cpu(rsp->result); + + BT_DBG("result 0x%4.4x", rsp->result); + + if (!result) + return 0; + + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident != cmd->ident) + continue; + + l2cap_chan_del(chan, ECONNRESET); + } + + return 0; +} + static inline int l2cap_le_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) @@ -5769,6 +6240,22 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, err = l2cap_le_credits(conn, cmd, cmd_len, data); break; + case L2CAP_ECRED_CONN_REQ: + err = l2cap_ecred_conn_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_CONN_RSP: + err = l2cap_ecred_conn_rsp(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_RECONF_REQ: + err = l2cap_ecred_reconf_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_ECRED_RECONF_RSP: + err = l2cap_ecred_reconf_rsp(conn, cmd, cmd_len, data); + break; + case L2CAP_DISCONN_REQ: err = l2cap_disconnect_req(conn, cmd, cmd_len, data); break; @@ -5831,9 +6318,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { struct hci_conn *hcon = conn->hcon; - u8 *data = skb->data; - int len = skb->len; - struct l2cap_cmd_hdr cmd; + struct l2cap_cmd_hdr *cmd; int err; l2cap_raw_recv(conn, skb); @@ -5841,35 +6326,34 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (hcon->type != ACL_LINK) goto drop; - while (len >= L2CAP_CMD_HDR_SIZE) { - u16 cmd_len; - memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); - data += L2CAP_CMD_HDR_SIZE; - len -= L2CAP_CMD_HDR_SIZE; + while (skb->len >= L2CAP_CMD_HDR_SIZE) { + u16 len; + + cmd = (void *) skb->data; + skb_pull(skb, L2CAP_CMD_HDR_SIZE); - cmd_len = le16_to_cpu(cmd.len); + len = le16_to_cpu(cmd->len); - BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, - cmd.ident); + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd->code, len, + cmd->ident); - if (cmd_len > len || !cmd.ident) { + if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); break; } - err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); + err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { struct l2cap_cmd_rej_unk rej; BT_ERR("Wrong link type (%d)", err); rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } - data += cmd_len; - len -= cmd_len; + skb_pull(skb, len); } drop: @@ -6814,11 +7298,13 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) struct l2cap_le_credits pkt; u16 return_credits; - return_credits = ((chan->imtu / chan->mps) + 1) - chan->rx_credits; + return_credits = (chan->imtu / chan->mps) + 1; - if (!return_credits) + if (chan->rx_credits >= return_credits) return; + return_credits -= chan->rx_credits; + BT_DBG("chan %p returning %u credits to sender", chan, return_credits); chan->rx_credits += return_credits; @@ -6831,7 +7317,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); } -static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb) +static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6846,7 +7332,7 @@ static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb) return err; } -static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) +static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6894,7 +7380,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) } if (skb->len == sdu_len) - return l2cap_le_recv(chan, skb); + return l2cap_ecred_recv(chan, skb); chan->sdu = skb; chan->sdu_len = sdu_len; @@ -6926,7 +7412,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) skb = NULL; if (chan->sdu->len == chan->sdu_len) { - err = l2cap_le_recv(chan, chan->sdu); + err = l2cap_ecred_recv(chan, chan->sdu); if (!err) { chan->sdu = NULL; chan->sdu_last_frag = NULL; @@ -6987,7 +7473,8 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, switch (chan->mode) { case L2CAP_MODE_LE_FLOWCTL: - if (l2cap_le_data_rcv(chan, skb) < 0) + case L2CAP_MODE_EXT_FLOWCTL: + if (l2cap_ecred_data_rcv(chan, skb) < 0) goto drop; goto done; @@ -7214,8 +7701,8 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, struct hci_dev *hdev; int err; - BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst, - dst_type, __le16_to_cpu(psm)); + BT_DBG("%pMR -> %pMR (type %u) psm 0x%4.4x mode 0x%2.2x", &chan->src, + dst, dst_type, __le16_to_cpu(psm), chan->mode); hdev = hci_get_route(dst, &chan->src, chan->src_type); if (!hdev) @@ -7244,6 +7731,12 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, break; case L2CAP_MODE_LE_FLOWCTL: break; + case L2CAP_MODE_EXT_FLOWCTL: + if (!enable_ecred) { + err = -EOPNOTSUPP; + goto done; + } + break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: if (!disable_ertm) @@ -7368,6 +7861,38 @@ done: } EXPORT_SYMBOL_GPL(l2cap_chan_connect); +static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) +{ + struct l2cap_conn *conn = chan->conn; + struct { + struct l2cap_ecred_reconf_req req; + __le16 scid; + } pdu; + + pdu.req.mtu = cpu_to_le16(chan->imtu); + pdu.req.mps = cpu_to_le16(chan->mps); + pdu.scid = cpu_to_le16(chan->scid); + + chan->ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, + sizeof(pdu), &pdu); +} + +int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu) +{ + if (chan->imtu > mtu) + return -EINVAL; + + BT_DBG("chan %p mtu 0x%4.4x", chan, mtu); + + chan->imtu = mtu; + + l2cap_ecred_reconfigure(chan); + + return 0; +} + /* ---- L2CAP interface with lower layer (HCI) ---- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) @@ -7579,7 +8104,8 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } else if (chan->state == BT_CONNECT2 && - chan->mode != L2CAP_MODE_LE_FLOWCTL) { + !(chan->mode == L2CAP_MODE_EXT_FLOWCTL || + chan->mode == L2CAP_MODE_LE_FLOWCTL)) { struct l2cap_conn_rsp rsp; __u16 res, stat; @@ -7787,3 +8313,6 @@ void l2cap_exit(void) module_param(disable_ertm, bool, 0644); MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); + +module_param(enable_ecred, bool, 0644); +MODULE_PARM_DESC(enable_ecred, "Enable enhanced credit flow control mode"); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a7be8b59b3c2..40fb10b591bd 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -232,7 +232,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - if (chan->psm && bdaddr_type_is_le(chan->src_type)) + if (chan->psm && bdaddr_type_is_le(chan->src_type) && !chan->mode) chan->mode = L2CAP_MODE_LE_FLOWCTL; err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), @@ -274,6 +274,12 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) case L2CAP_MODE_BASIC: case L2CAP_MODE_LE_FLOWCTL: break; + case L2CAP_MODE_EXT_FLOWCTL: + if (!enable_ecred) { + err = -EOPNOTSUPP; + goto done; + } + break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: if (!disable_ertm) @@ -427,6 +433,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; + BT_DBG("mode 0x%2.2x", chan->mode); + len = min_t(unsigned int, len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -499,6 +507,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct bt_security sec; struct bt_power pwr; + u32 phys; int len, err = 0; BT_DBG("sk %p", sk); @@ -603,6 +612,18 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; + case BT_PHY: + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + phys = hci_conn_get_phy(chan->conn->hcon); + + if (put_user(phys, (u32 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; @@ -694,6 +715,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; } + BT_DBG("mode 0x%2.2x", chan->mode); + chan->imtu = opts.imtu; chan->omtu = opts.omtu; chan->fcs = opts.fcs; @@ -926,7 +949,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (sk->sk_state == BT_CONNECTED) { + if (chan->mode == L2CAP_MODE_LE_FLOWCTL && + sk->sk_state == BT_CONNECTED) { err = -EISCONN; break; } @@ -936,7 +960,12 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - chan->imtu = opt; + if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && + sk->sk_state == BT_CONNECTED) + err = l2cap_chan_reconfigure(chan, opt); + else + chan->imtu = opt; + break; default: @@ -991,7 +1020,11 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (sk->sk_state == BT_CONNECT2 && test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - if (bdaddr_type_is_le(pi->chan->src_type)) { + if (pi->chan->mode == L2CAP_MODE_EXT_FLOWCTL) { + sk->sk_state = BT_CONNECTED; + pi->chan->state = BT_CONNECTED; + __l2cap_ecred_conn_rsp_defer(pi->chan); + } else if (bdaddr_type_is_le(pi->chan->src_type)) { sk->sk_state = BT_CONNECTED; pi->chan->state = BT_CONNECTED; __l2cap_le_connect_rsp_defer(pi->chan); @@ -1042,7 +1075,7 @@ done: } /* Kill socket (only if zapped and orphan) - * Must be called on unlocked socket. + * Must be called on unlocked socket, with l2cap channel lock. */ static void l2cap_sock_kill(struct sock *sk) { @@ -1193,6 +1226,7 @@ static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; int err; + struct l2cap_chan *chan; BT_DBG("sock %p, sk %p", sock, sk); @@ -1202,9 +1236,17 @@ static int l2cap_sock_release(struct socket *sock) bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, 2); + chan = l2cap_pi(sk)->chan; + + l2cap_chan_hold(chan); + l2cap_chan_lock(chan); sock_orphan(sk); l2cap_sock_kill(sk); + + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); + return err; } @@ -1222,12 +1264,15 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) BT_DBG("child chan %p state %s", chan, state_to_string(chan->state)); + l2cap_chan_hold(chan); l2cap_chan_lock(chan); + __clear_chan_timer(chan); l2cap_chan_close(chan, ECONNRESET); - l2cap_chan_unlock(chan); - l2cap_sock_kill(sk); + + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3074363c68df..6552003a170e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 15 +#define MGMT_REVISION 16 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -107,6 +107,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_EXT_INFO, MGMT_OP_SET_APPEARANCE, MGMT_OP_SET_BLOCKED_KEYS, + MGMT_OP_SET_WIDEBAND_SPEECH, }; static const u16 mgmt_events[] = { @@ -762,6 +763,10 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; + + if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, + &hdev->quirks)) + settings |= MGMT_SETTING_WIDEBAND_SPEECH; } if (lmp_le_capable(hdev)) { @@ -846,6 +851,9 @@ static u32 get_current_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_STATIC_ADDRESS; } + if (hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED)) + settings |= MGMT_SETTING_WIDEBAND_SPEECH; + return settings; } @@ -1382,6 +1390,12 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } + if (hdev->advertising_paused) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_BUSY); + goto failed; + } + if (!hdev_is_powered(hdev)) { bool changed = false; @@ -3589,6 +3603,62 @@ static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data, err, NULL, 0); } +static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_mode *cp = data; + int err; + bool changed = false; + + BT_DBG("request for %s", hdev->name); + + if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks)) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_NOT_SUPPORTED); + + if (cp->val != 0x00 && cp->val != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_BUSY); + goto unlock; + } + + if (hdev_is_powered(hdev) && + !!cp->val != hci_dev_test_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED)) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_WIDEBAND_SPEECH, + MGMT_STATUS_REJECTED); + goto unlock; + } + + if (cp->val) + changed = !hci_dev_test_and_set_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + else + changed = hci_dev_test_and_clear_flag(hdev, + HCI_WIDEBAND_SPEECH_ENABLED); + + err = send_settings_rsp(sk, MGMT_OP_SET_WIDEBAND_SPEECH, hdev); + if (err < 0) + goto unlock; + + if (changed) + err = new_settings(hdev, sk); + +unlock: + hci_dev_unlock(hdev); + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -3865,6 +3935,13 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); + + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_UNPAUSE_DISCOVERY, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, @@ -3926,6 +4003,13 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, goto failed; } + /* Can't start discovery when it is paused */ + if (hdev->discovery_paused) { + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, + &cp->type, sizeof(cp->type)); + goto failed; + } + /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ @@ -4093,6 +4177,12 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) } hci_dev_unlock(hdev); + + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_DISCOVERY, hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused discovery"); + wake_up(&hdev->suspend_wait_q); + } } static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, @@ -4324,6 +4414,17 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, if (match.sk) sock_put(match.sk); + /* Handle suspend notifier */ + if (test_and_clear_bit(SUSPEND_PAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Paused advertising"); + wake_up(&hdev->suspend_wait_q); + } else if (test_and_clear_bit(SUSPEND_UNPAUSE_ADVERTISING, + hdev->suspend_tasks)) { + bt_dev_dbg(hdev, "Unpaused advertising"); + wake_up(&hdev->suspend_wait_q); + } + /* If "Set Advertising" was just disabled and instance advertising was * set up earlier, then re-enable multi-instance advertising. */ @@ -4375,6 +4476,10 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); + if (hdev->advertising_paused) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_BUSY); + hci_dev_lock(hdev); val = !!cp->val; @@ -6743,8 +6848,11 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, if (!err) err = hci_req_run(&req, add_advertising_complete); - if (err < 0) + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_FAILED); mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -6990,6 +7098,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { set_phy_configuration, MGMT_SET_PHY_CONFIGURATION_SIZE }, { set_blocked_keys, MGMT_OP_SET_BLOCKED_KEYS_SIZE, HCI_MGMT_VAR_LEN }, + { set_wideband_speech, MGMT_SETTING_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 3a9e9d9670be..2e20af317cea 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -40,7 +40,6 @@ static bool disable_cfc; static bool l2cap_ertm; static int channel_mtu = -1; -static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; static struct task_struct *rfcomm_thread; @@ -73,8 +72,6 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); /* ---- RFCOMM frame parsing macros ---- */ #define __get_dlci(b) ((b & 0xfc) >> 2) -#define __get_channel(b) ((b & 0xf8) >> 3) -#define __get_dir(b) ((b & 0x04) >> 2) #define __get_type(b) ((b & 0xef)) #define __test_ea(b) ((b & 0x01)) @@ -87,7 +84,6 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s); #define __ctrl(type, pf) (((type & 0xef) | (pf << 4))) #define __dlci(dir, chn) (((chn & 0x1f) << 1) | dir) #define __srv_channel(dlci) (dlci >> 1) -#define __dir(dlci) (dlci & 0x01) #define __len8(len) (((len) << 1) | 1) #define __len16(len) ((len) << 1) @@ -752,7 +748,8 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->chan->imtu = l2cap_mtu; + /* Set MTU to 0 so L2CAP can auto select the MTU */ + l2cap_pi(sk)->chan->imtu = 0; l2cap_pi(sk)->chan->sec_level = sec_level; if (l2cap_ertm) l2cap_pi(sk)->chan->mode = L2CAP_MODE_ERTM; @@ -2039,7 +2036,8 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Set L2CAP options */ sk = sock->sk; lock_sock(sk); - l2cap_pi(sk)->chan->imtu = l2cap_mtu; + /* Set MTU to 0 so L2CAP can auto select the MTU */ + l2cap_pi(sk)->chan->imtu = 0; release_sock(sk); /* Start listening on the socket */ @@ -2237,9 +2235,6 @@ MODULE_PARM_DESC(disable_cfc, "Disable credit based flow control"); module_param(channel_mtu, int, 0644); MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel"); -module_param(l2cap_mtu, uint, 0644); -MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); - module_param(l2cap_ertm, bool, 0644); MODULE_PARM_DESC(l2cap_ertm, "Use L2CAP ERTM mode for connection"); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 0c7d31c6c18c..a58584949a95 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -413,10 +413,8 @@ static int __rfcomm_create_dev(struct sock *sk, void __user *arg) dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel); if (IS_ERR(dlc)) return PTR_ERR(dlc); - else if (dlc) { - rfcomm_dlc_put(dlc); + if (dlc) return -EBUSY; - } dlc = rfcomm_dlc_alloc(GFP_KERNEL); if (!dlc) return -ENOMEM; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b91d6b440fdf..c8c3d38cdc7b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -922,6 +922,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; int len, err = 0; struct bt_voice voice; + u32 phys; BT_DBG("sk %p", sk); @@ -956,6 +957,18 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, break; + case BT_PHY: + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + break; + } + + phys = hci_conn_get_phy(sco_pi(sk)->conn->hcon); + + if (put_user(phys, (u32 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 204f14f8b507..1476a91ce935 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1145,7 +1145,7 @@ static void sc_generate_link_key(struct smp_chan *smp) return; if (test_bit(SMP_FLAG_CT2, &smp->flags)) { - /* SALT = 0x00000000000000000000000000000000746D7031 */ + /* SALT = 0x000000000000000000000000746D7031 */ const u8 salt[16] = { 0x31, 0x70, 0x6d, 0x74 }; if (smp_h7(smp->tfm_cmac, smp->tk, salt, smp->link_key)) { @@ -1203,7 +1203,7 @@ static void sc_generate_ltk(struct smp_chan *smp) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); if (test_bit(SMP_FLAG_CT2, &smp->flags)) { - /* SALT = 0x00000000000000000000000000000000746D7032 */ + /* SALT = 0x000000000000000000000000746D7032 */ const u8 salt[16] = { 0x32, 0x70, 0x6d, 0x74 }; if (smp_h7(smp->tfm_cmac, key->val, salt, smp->tk)) @@ -2115,7 +2115,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; struct hci_conn *hcon = conn->hcon; - u8 *pkax, *pkbx, *na, *nb; + u8 *pkax, *pkbx, *na, *nb, confirm_hint; u32 passkey; int err; @@ -2168,6 +2168,24 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); + + /* Only Just-Works pairing requires extra checks */ + if (smp->method != JUST_WORKS) + goto mackey_and_ltk; + + /* If there already exists long term key in local host, leave + * the decision to user space since the remote device could + * be legitimate or malicious. + */ + if (hci_find_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, + hcon->role)) { + /* Set passkey to 0. The value can be any number since + * it'll be ignored anyway. + */ + passkey = 0; + confirm_hint = 1; + goto confirm; + } } mackey_and_ltk: @@ -2188,8 +2206,11 @@ mackey_and_ltk: if (err) return SMP_UNSPECIFIED; + confirm_hint = 0; + +confirm: err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, hcon->type, - hcon->dst_type, passkey, 0); + hcon->dst_type, passkey, confirm_hint); if (err) return SMP_UNSPECIFIED; diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index afee292fb004..162998e2f039 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -26,8 +26,8 @@ static size_t __get_vlan_tinfo_size(void) nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ } -static bool vlan_tunid_inrange(struct net_bridge_vlan *v_curr, - struct net_bridge_vlan *v_last) +bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *v_last) { __be32 tunid_curr = tunnel_id_to_key32(v_curr->tinfo.tunnel_id); __be32 tunid_last = tunnel_id_to_key32(v_last->tinfo.tunnel_id); @@ -193,8 +193,8 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 }, }; -static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, - u16 vid, u32 tun_id, bool *changed) +int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id, bool *changed) { int err = 0; @@ -250,8 +250,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, return 0; } -int br_process_vlan_tunnel_info(struct net_bridge *br, - struct net_bridge_port *p, int cmd, +int br_process_vlan_tunnel_info(const struct net_bridge *br, + const struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, struct vtunnel_info *tinfo_last, bool *changed) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 5153ffe79a01..1f97703a52ff 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1199,8 +1199,8 @@ static inline void br_vlan_notify(const struct net_bridge *br, /* br_vlan_options.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING -bool br_vlan_opts_eq(const struct net_bridge_vlan *v1, - const struct net_bridge_vlan *v2); +bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end); bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v); size_t br_vlan_opts_nl_size(void); int br_vlan_process_options(const struct net_bridge *br, diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h index 2bdef2ea3420..c54cc26211d7 100644 --- a/net/bridge/br_private_tunnel.h +++ b/net/bridge/br_private_tunnel.h @@ -18,8 +18,8 @@ struct vtunnel_info { /* br_netlink_tunnel.c */ int br_parse_vlan_tunnel_info(struct nlattr *attr, struct vtunnel_info *tinfo); -int br_process_vlan_tunnel_info(struct net_bridge *br, - struct net_bridge_port *p, +int br_process_vlan_tunnel_info(const struct net_bridge *br, + const struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, struct vtunnel_info *tinfo_last, @@ -32,8 +32,9 @@ int br_fill_vlan_tunnel_info(struct sk_buff *skb, /* br_vlan_tunnel.c */ int vlan_tunnel_init(struct net_bridge_vlan_group *vg); void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg); -int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid); -int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id); +int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid); +int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, + u32 tun_id); void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port); void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan); @@ -42,19 +43,23 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan_group *vg); int br_handle_egress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan *vlan); +bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *v_last); +int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id, bool *changed); #else static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg) { return 0; } -static inline int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, +static inline int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid) { return 0; } -static inline int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, +static inline int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, u32 tun_id) { return 0; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 6b5deca08b89..f9092c71225f 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1569,10 +1569,41 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) } } +static bool br_vlan_stats_fill(struct sk_buff *skb, + const struct net_bridge_vlan *v) +{ + struct br_vlan_stats stats; + struct nlattr *nest; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_STATS); + if (!nest) + return false; + + br_vlan_get_stats(v, &stats); + if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, stats.rx_bytes, + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS, + stats.rx_packets, BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, stats.tx_bytes, + BRIDGE_VLANDB_STATS_PAD) || + nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS, + stats.tx_packets, BRIDGE_VLANDB_STATS_PAD)) + goto out_err; + + nla_nest_end(skb, nest); + + return true; + +out_err: + nla_nest_cancel(skb, nest); + return false; +} + /* v_opts is used to dump the options which must be equal in the whole range */ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts, - u16 flags) + u16 flags, + bool dump_stats) { struct bridge_vlan_info info; struct nlattr *nest; @@ -1596,8 +1627,13 @@ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range, nla_put_u16(skb, BRIDGE_VLANDB_ENTRY_RANGE, vid_range)) goto out_err; - if (v_opts && !br_vlan_opts_fill(skb, v_opts)) - goto out_err; + if (v_opts) { + if (!br_vlan_opts_fill(skb, v_opts)) + goto out_err; + + if (dump_stats && !br_vlan_stats_fill(skb, v_opts)) + goto out_err; + } nla_nest_end(skb, nest); @@ -1675,7 +1711,7 @@ void br_vlan_notify(const struct net_bridge *br, goto out_kfree; } - if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags)) + if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags, false)) goto out_err; nlmsg_end(skb, nlh); @@ -1694,14 +1730,16 @@ bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, { return v_curr->vid - range_end->vid == 1 && range_end->flags == v_curr->flags && - br_vlan_opts_eq(v_curr, range_end); + br_vlan_opts_eq_range(v_curr, range_end); } static int br_vlan_dump_dev(const struct net_device *dev, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + u32 dump_flags) { struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL; + bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS); struct net_bridge_vlan_group *vg; int idx = 0, s_idx = cb->args[1]; struct nlmsghdr *nlh = NULL; @@ -1754,12 +1792,13 @@ static int br_vlan_dump_dev(const struct net_device *dev, continue; } - if (v->vid == pvid || !br_vlan_can_enter_range(v, range_end)) { - u16 flags = br_vlan_flags(range_start, pvid); + if (dump_stats || v->vid == pvid || + !br_vlan_can_enter_range(v, range_end)) { + u16 vlan_flags = br_vlan_flags(range_start, pvid); if (!br_vlan_fill_vids(skb, range_start->vid, range_end->vid, range_start, - flags)) { + vlan_flags, dump_stats)) { err = -EMSGSIZE; break; } @@ -1778,7 +1817,8 @@ static int br_vlan_dump_dev(const struct net_device *dev, */ if (!err && range_start && !br_vlan_fill_vids(skb, range_start->vid, range_end->vid, - range_start, br_vlan_flags(range_start, pvid))) + range_start, br_vlan_flags(range_start, pvid), + dump_stats)) err = -EMSGSIZE; cb->args[1] = err ? idx : 0; @@ -1788,18 +1828,27 @@ static int br_vlan_dump_dev(const struct net_device *dev, return err; } +static const struct nla_policy br_vlan_db_dump_pol[BRIDGE_VLANDB_DUMP_MAX + 1] = { + [BRIDGE_VLANDB_DUMP_FLAGS] = { .type = NLA_U32 }, +}; + static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *dtb[BRIDGE_VLANDB_DUMP_MAX + 1]; int idx = 0, err = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); struct br_vlan_msg *bvm; struct net_device *dev; + u32 dump_flags = 0; - err = nlmsg_parse(cb->nlh, sizeof(*bvm), NULL, 0, NULL, cb->extack); + err = nlmsg_parse(cb->nlh, sizeof(*bvm), dtb, BRIDGE_VLANDB_DUMP_MAX, + br_vlan_db_dump_pol, cb->extack); if (err < 0) return err; bvm = nlmsg_data(cb->nlh); + if (dtb[BRIDGE_VLANDB_DUMP_FLAGS]) + dump_flags = nla_get_u32(dtb[BRIDGE_VLANDB_DUMP_FLAGS]); rcu_read_lock(); if (bvm->ifindex) { @@ -1808,7 +1857,7 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) err = -ENODEV; goto out_err; } - err = br_vlan_dump_dev(dev, skb, cb); + err = br_vlan_dump_dev(dev, skb, cb, dump_flags); if (err && err != -EMSGSIZE) goto out_err; } else { @@ -1816,7 +1865,7 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto skip; - err = br_vlan_dump_dev(dev, skb, cb); + err = br_vlan_dump_dev(dev, skb, cb, dump_flags); if (err == -EMSGSIZE) break; skip: @@ -1839,6 +1888,7 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = .len = sizeof(struct bridge_vlan_info) }, [BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 }, [BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 }, + [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED }, }; static int br_vlan_rtm_process_one(struct net_device *dev, diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index cd2eb194eb98..b4add9ea8964 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -4,25 +4,58 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/slab.h> +#include <net/ip_tunnels.h> #include "br_private.h" +#include "br_private_tunnel.h" -/* check if the options between two vlans are equal */ -bool br_vlan_opts_eq(const struct net_bridge_vlan *v1, - const struct net_bridge_vlan *v2) +static bool __vlan_tun_put(struct sk_buff *skb, const struct net_bridge_vlan *v) { - return v1->state == v2->state; + __be32 tid = tunnel_id_to_key32(v->tinfo.tunnel_id); + struct nlattr *nest; + + if (!v->tinfo.tunnel_dst) + return true; + + nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY_TUNNEL_INFO); + if (!nest) + return false; + if (nla_put_u32(skb, BRIDGE_VLANDB_TINFO_ID, be32_to_cpu(tid))) { + nla_nest_cancel(skb, nest); + return false; + } + nla_nest_end(skb, nest); + + return true; +} + +static bool __vlan_tun_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return (!v_curr->tinfo.tunnel_dst && !range_end->tinfo.tunnel_dst) || + vlan_tunid_inrange(v_curr, range_end); +} + +/* check if the options' state of v_curr allow it to enter the range */ +bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return v_curr->state == range_end->state && + __vlan_tun_can_enter_range(v_curr, range_end); } bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v) { return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, - br_vlan_get_state(v)); + br_vlan_get_state(v)) && + __vlan_tun_put(skb, v); } size_t br_vlan_opts_nl_size(void) { - return nla_total_size(sizeof(u8)); /* BRIDGE_VLANDB_ENTRY_STATE */ + return nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_STATE */ + + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY_TUNNEL_INFO */ + + nla_total_size(sizeof(u32)); /* BRIDGE_VLANDB_TINFO_ID */ } static int br_vlan_modify_state(struct net_bridge_vlan_group *vg, @@ -62,6 +95,68 @@ static int br_vlan_modify_state(struct net_bridge_vlan_group *vg, return 0; } +static const struct nla_policy br_vlandb_tinfo_pol[BRIDGE_VLANDB_TINFO_MAX + 1] = { + [BRIDGE_VLANDB_TINFO_ID] = { .type = NLA_U32 }, + [BRIDGE_VLANDB_TINFO_CMD] = { .type = NLA_U32 }, +}; + +static int br_vlan_modify_tunnel(const struct net_bridge_port *p, + struct net_bridge_vlan *v, + struct nlattr **tb, + bool *changed, + struct netlink_ext_ack *extack) +{ + struct nlattr *tun_tb[BRIDGE_VLANDB_TINFO_MAX + 1], *attr; + struct bridge_vlan_info *vinfo; + u32 tun_id = 0; + int cmd, err; + + if (!p) { + NL_SET_ERR_MSG_MOD(extack, "Can't modify tunnel mapping of non-port vlans"); + return -EINVAL; + } + if (!(p->flags & BR_VLAN_TUNNEL)) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't have tunnel flag set"); + return -EINVAL; + } + + attr = tb[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO]; + err = nla_parse_nested(tun_tb, BRIDGE_VLANDB_TINFO_MAX, attr, + br_vlandb_tinfo_pol, extack); + if (err) + return err; + + if (!tun_tb[BRIDGE_VLANDB_TINFO_CMD]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel command attribute"); + return -ENOENT; + } + cmd = nla_get_u32(tun_tb[BRIDGE_VLANDB_TINFO_CMD]); + switch (cmd) { + case RTM_SETLINK: + if (!tun_tb[BRIDGE_VLANDB_TINFO_ID]) { + NL_SET_ERR_MSG_MOD(extack, "Missing tunnel id attribute"); + return -ENOENT; + } + /* when working on vlan ranges this is the starting tunnel id */ + tun_id = nla_get_u32(tun_tb[BRIDGE_VLANDB_TINFO_ID]); + /* vlan info attr is guaranteed by br_vlan_rtm_process_one */ + vinfo = nla_data(tb[BRIDGE_VLANDB_ENTRY_INFO]); + /* tunnel ids are mapped to each vlan in increasing order, + * the starting vlan is in BRIDGE_VLANDB_ENTRY_INFO and v is the + * current vlan, so we compute: tun_id + v - vinfo->vid + */ + tun_id += v->vid - vinfo->vid; + break; + case RTM_DELLINK: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel command"); + return -EINVAL; + } + + return br_vlan_tunnel_info(p, cmd, v->vid, tun_id, changed); +} + static int br_vlan_process_one_opts(const struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_vlan_group *vg, @@ -80,6 +175,11 @@ static int br_vlan_process_one_opts(const struct net_bridge *br, if (err) return err; } + if (tb[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO]) { + err = br_vlan_modify_tunnel(p, v, tb, changed, extack); + if (err) + return err; + } return 0; } diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index d13d2080f527..169e005fbda2 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -89,7 +89,8 @@ out: /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ -int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id) +int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, + u32 tun_id) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; @@ -107,7 +108,7 @@ int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id) /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ -int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid) +int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index e1256e03a9a8..78db58c7aec2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1561,7 +1561,7 @@ struct compat_ebt_entry_mwt { compat_uptr_t ptr; } u; compat_uint_t match_size; - compat_uint_t data[0] __attribute__ ((aligned (__alignof__(struct compat_ebt_replace)))); + compat_uint_t data[] __aligned(__alignof__(struct compat_ebt_replace)); }; /* account for possible padding between match_size and ->data */ diff --git a/net/core/dev.c b/net/core/dev.c index d84541c24446..021e18251465 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9283,6 +9283,10 @@ int register_netdevice(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); BUG_ON(!net); + ret = ethtool_check_ops(dev->ethtool_ops); + if (ret) + return ret; + spin_lock_init(&dev->addr_list_lock); lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c5beb3031a72..5f782fa3029f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -861,8 +861,8 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!flow_offload_has_one_action(&cls->rule->action)) return err; - if (!flow_action_basic_hw_stats_types_check(&cls->rule->action, - cls->common.extack)) + if (!flow_action_basic_hw_stats_check(&cls->rule->action, + cls->common.extack)) return err; act = &cls->rule->action.entries[0]; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0b22741b2f8f..dab047eec943 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -289,3 +289,14 @@ out: kfree(indir); return ret; } + +int ethtool_check_ops(const struct ethtool_ops *ops) +{ + if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params)) + return -EINVAL; + /* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime, + * the fact that ops are checked at registration time does not + * mean the ops attached to a netdev later on are sane. + */ + return 0; +} diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 258840b19fb5..3852a58d7f95 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1519,9 +1519,6 @@ ethtool_set_coalesce_supported(struct net_device *dev, u32 supported_params = dev->ethtool_ops->supported_coalesce_params; u32 nonzero_params = 0; - if (!supported_params) - return true; - if (coalesce->rx_coalesce_usecs) nonzero_params |= ETHTOOL_COALESCE_RX_USECS; if (coalesce->rx_max_coalesced_frames) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f1f78a742b36..b167f4a5b684 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1057,7 +1057,7 @@ struct compat_arpt_replace { u32 underflow[NF_ARP_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; - struct compat_arpt_entry entries[0]; + struct compat_arpt_entry entries[]; }; static inline void compat_release_entry(struct compat_arpt_entry *e) @@ -1383,7 +1383,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, struct compat_arpt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_arpt_entry entrytable[0]; + struct compat_arpt_entry entrytable[]; }; static int compat_get_entries(struct net *net, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 10b91ebdf213..c2670eaa74e6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1211,7 +1211,7 @@ struct compat_ipt_replace { u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ - struct compat_ipt_entry entries[0]; + struct compat_ipt_entry entries[]; }; static int @@ -1562,7 +1562,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, struct compat_ipt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_ipt_entry entrytable[0]; + struct compat_ipt_entry entrytable[]; }; static int diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 645cc3009e64..f5f588b1f6e9 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -145,12 +145,13 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else { - bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt, 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; } + bictcp_update(ca, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, ca->cnt, acked); } /* diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 471571e1ab26..6cebf412d590 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -10,10 +10,9 @@ #include <net/tcp.h> /* These factors derived from the recommended values in the aer: - * .01 and and 7/8. We use 50 instead of 100 to account for - * delayed ack. + * .01 and and 7/8. */ -#define TCP_SCALABLE_AI_CNT 50U +#define TCP_SCALABLE_AI_CNT 100U #define TCP_SCALABLE_MD_SCALE 3 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -23,11 +22,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), - 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } static u32 tcp_scalable_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 3b36bb1a0dda..50a9a6e2c4cd 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -153,31 +153,34 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; if (tcp_in_slow_start(tp)) { - /* Slow start. */ - tcp_slow_start(tp, acked); + /* Slow start. */ + acked = tcp_slow_start(tp, acked); + if (!acked) + goto done; + } + + /* Congestion avoidance. */ + if (veno->diff < beta) { + /* In the "non-congestive state", increase cwnd + * every rtt. + */ + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } else { - /* Congestion avoidance. */ - if (veno->diff < beta) { - /* In the "non-congestive state", increase cwnd - * every rtt. - */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); - } else { - /* In the "congestive state", increase cwnd - * every other rtt. - */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (veno->inc && - tp->snd_cwnd < tp->snd_cwnd_clamp) { - tp->snd_cwnd++; - veno->inc = 0; - } else - veno->inc = 1; - tp->snd_cwnd_cnt = 0; + /* In the "congestive state", increase cwnd + * every other rtt. + */ + if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (veno->inc && + tp->snd_cwnd < tp->snd_cwnd_clamp) { + tp->snd_cwnd++; + veno->inc = 0; } else - tp->snd_cwnd_cnt++; - } + veno->inc = 1; + tp->snd_cwnd_cnt = 0; + } else + tp->snd_cwnd_cnt += acked; } +done: if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e00570dd0a69..3bb448761ca3 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -36,8 +36,6 @@ struct yeah { u32 reno_count; u32 fast_count; - - u32 pkts_acked; }; static void tcp_yeah_init(struct sock *sk) @@ -57,18 +55,6 @@ static void tcp_yeah_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void tcp_yeah_pkts_acked(struct sock *sk, - const struct ack_sample *sample) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct yeah *yeah = inet_csk_ca(sk); - - if (icsk->icsk_ca_state == TCP_CA_Open) - yeah->pkts_acked = sample->pkts_acked; - - tcp_vegas_pkts_acked(sk, sample); -} - static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -77,24 +63,19 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + goto do_vegas; + } - else if (!yeah->doing_reno_now) { + if (!yeah->doing_reno_now) { /* Scalable */ - - tp->snd_cwnd_cnt += yeah->pkts_acked; - if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } - - yeah->pkts_acked = 1; - + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. @@ -118,7 +99,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) * of bytes we send in an RTT is often less than our cwnd will allow. * So we keep track of our cwnd separately, in v_beg_snd_cwnd. */ - +do_vegas: if (after(ack, yeah->vegas.beg_snd_nxt)) { /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -232,7 +213,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = { .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, - .pkts_acked = tcp_yeah_pkts_acked, + .pkts_acked = tcp_vegas_pkts_acked, .owner = THIS_MODULE, .name = "yeah", diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c973ace208c5..e27393498ecb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1227,7 +1227,7 @@ struct compat_ip6t_replace { u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ - struct compat_ip6t_entry entries[0]; + struct compat_ip6t_entry entries[]; }; static int @@ -1571,7 +1571,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, struct compat_ip6t_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_ip6t_entry entrytable[0]; + struct compat_ip6t_entry entrytable[]; }; static int diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 370da2f80e3c..25c1007f1098 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -261,7 +261,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) aggregate_strp_stats(&knet->aggregate_strp_stats, &strp_stats); - list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) { + list_for_each_entry(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 63c8ee49cef2..55f3ce7638a0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -259,11 +259,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, struct mptcp_ext *mpext; unsigned int data_len; - pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow, - subflow->fourth_ack, subflow->snd_isn, + pr_debug("subflow=%p fully established=%d seq=%x:%x remaining=%d", + subflow, subflow->fully_established, subflow->snd_isn, skb ? TCP_SKB_CB(skb)->seq : 0, remaining); - if (subflow->mp_capable && !subflow->fourth_ack && skb && + if (subflow->mp_capable && !subflow->fully_established && skb && subflow->snd_isn == TCP_SKB_CB(skb)->seq) { /* When skb is not available, we better over-estimate the * emitted options len. A full DSS option is longer than @@ -429,19 +429,19 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, return false; } -static bool check_fourth_ack(struct mptcp_subflow_context *subflow, - struct sk_buff *skb, - struct mptcp_options_received *mp_opt) +static bool check_fully_established(struct mptcp_subflow_context *subflow, + struct sk_buff *skb, + struct mptcp_options_received *mp_opt) { /* here we can process OoO, in-window pkts, only in-sequence 4th ack * are relevant */ - if (likely(subflow->fourth_ack || + if (likely(subflow->fully_established || TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)) return true; if (mp_opt->use_ack) - subflow->fourth_ack = 1; + subflow->fully_established = 1; if (subflow->can_ack) return true; @@ -467,7 +467,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, struct mptcp_ext *mpext; mp_opt = &opt_rx->mptcp; - if (!check_fourth_ack(subflow, skb, mp_opt)) + if (!check_fully_established(subflow, skb, mp_opt)) return; if (!mp_opt->dss) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 04c3caed92df..e959104832ef 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -861,6 +861,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) ack_seq++; msk->ack_seq = ack_seq; } + + /* will be fully established after successful MPC subflow creation */ + inet_sk_state_store(nsk, TCP_SYN_RECV); bh_unlock_sock(nsk); /* keep a single reference */ @@ -916,10 +919,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); - /* will be fully established at mptcp_stream_accept() - * completion. - */ - inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV); bh_unlock_sock(new_mptcp_sock); local_bh_enable(); } @@ -1256,8 +1255,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } - - inet_sk_state_store(newsock->sk, TCP_ESTABLISHED); } sock_put(ssock->sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 9baf6fcba914..eb3f65264a40 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -119,7 +119,7 @@ struct mptcp_subflow_context { u32 map_data_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ mp_capable : 1, /* remote is MPTCP capable */ - fourth_ack : 1, /* send initial DSS */ + fully_established : 1, /* path validated */ conn_finished : 1, map_valid : 1, mpc_map : 1, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8434c7f5f712..e1faa88855bf 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -234,6 +234,8 @@ create_child: /* new mpc subflow takes ownership of the newly * created mptcp socket */ + inet_sk_state_store((struct sock *)new_msk, + TCP_ESTABLISHED); ctx->conn = new_msk; new_msk = NULL; } @@ -800,7 +802,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_sock = newsk; new_ctx->mp_capable = 1; - new_ctx->fourth_ack = subflow_req->remote_key_valid; + new_ctx->fully_established = subflow_req->remote_key_valid; new_ctx->can_ack = subflow_req->remote_key_valid; new_ctx->remote_key = subflow_req->remote_key; new_ctx->local_key = subflow_req->local_key; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 91efae88e8c2..468fea1aebba 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -455,14 +455,6 @@ config NF_TABLES To compile it as a module, choose M here. if NF_TABLES - -config NF_TABLES_SET - tristate "Netfilter nf_tables set infrastructure" - help - This option enables the nf_tables set infrastructure that allows to - look up for elements in a set and to build one-way mappings between - matchings and actions. - config NF_TABLES_INET depends on IPV6 select NF_TABLES_IPV4 diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3f572e5a975e..292e71dc7ba4 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -78,14 +78,17 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \ - nft_chain_route.o nf_tables_offload.o + nft_chain_route.o nf_tables_offload.o \ + nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ + nft_set_pipapo.o -nf_tables_set-objs := nf_tables_set_core.o \ - nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ - nft_set_pipapo.o +ifdef CONFIG_X86_64 +ifneq (,$(findstring -DCONFIG_AS_AVX2=1,$(KBUILD_CFLAGS))) +nf_tables-objs += nft_set_pipapo_avx2.o +endif +endif obj-$(CONFIG_NF_TABLES) += nf_tables.o -obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 0a2196f59106..486959f70cf3 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -46,7 +46,7 @@ struct bitmap_ip { u8 netmask; /* subnet netmask */ struct timer_list gc; /* garbage collection */ struct ip_set *set; /* attached to this ip_set */ - unsigned char extensions[0] /* data extensions */ + unsigned char extensions[] /* data extensions */ __aligned(__alignof__(u64)); }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 739e343efaf6..2310a316e0af 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -49,7 +49,7 @@ struct bitmap_ipmac { size_t memsize; /* members size */ struct timer_list gc; /* garbage collector */ struct ip_set *set; /* attached to this ip_set */ - unsigned char extensions[0] /* MAC + data extensions */ + unsigned char extensions[] /* MAC + data extensions */ __aligned(__alignof__(u64)); }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b49978dd810d..e56ced66f202 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -37,7 +37,7 @@ struct bitmap_port { size_t memsize; /* members size */ struct timer_list gc; /* garbage collection */ struct ip_set *set; /* attached to this ip_set */ - unsigned char extensions[0] /* data extensions */ + unsigned char extensions[] /* data extensions */ __aligned(__alignof__(u64)); }; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e52d7b7597a0..1ee43752d6d3 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -76,7 +76,7 @@ struct hbucket { DECLARE_BITMAP(used, AHASH_MAX_TUNED); u8 size; /* size of the array */ u8 pos; /* position of the first free entry */ - unsigned char value[0] /* the array of the values */ + unsigned char value[] /* the array of the values */ __aligned(__alignof__(u64)); }; @@ -109,7 +109,7 @@ struct htable { u8 htable_bits; /* size of hash table == 2^htable_bits */ u32 maxelem; /* Maxelem per region */ struct ip_set_region *hregion; /* Region locks and ext sizes */ - struct hbucket __rcu *bucket[0]; /* hashtable buckets */ + struct hbucket __rcu *bucket[]; /* hashtable buckets */ }; #define hbucket(h, i) ((h)->bucket[i]) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1927fc296f95..a18f8fe728e3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2633,7 +2633,6 @@ void nf_conntrack_init_end(void) */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) #define DYING_NULLS_VAL ((1<<30)+1) -#define TEMPLATE_NULLS_VAL ((1<<30)+2) int nf_conntrack_init_net(struct net *net) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4912069627b6..9b57330c81f8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1054,21 +1054,18 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) nf_conntrack_standalone_init_dccp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); - /* Don't export sysctls to unprivileged users */ + /* Don't allow unprivileged users to alter certain sysctls */ if (net->user_ns != &init_user_ns) { - table[NF_SYSCTL_CT_MAX].procname = NULL; - table[NF_SYSCTL_CT_ACCT].procname = NULL; - table[NF_SYSCTL_CT_HELPER].procname = NULL; -#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP - table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL; -#endif + table[NF_SYSCTL_CT_MAX].mode = 0444; + table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444; + table[NF_SYSCTL_CT_HELPER].mode = 0444; #ifdef CONFIG_NF_CONNTRACK_EVENTS - table[NF_SYSCTL_CT_EVENTS].procname = NULL; + table[NF_SYSCTL_CT_EVENTS].mode = 0444; #endif - } - - if (!net_eq(&init_net, net)) table[NF_SYSCTL_CT_BUCKETS].mode = 0444; + } else if (!net_eq(&init_net, net)) { + table[NF_SYSCTL_CT_BUCKETS].mode = 0444; + } net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 42b73a084a63..ad549317af30 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -7,6 +7,7 @@ #include <linux/tc_act/tc_csum.h> #include <net/flow_offload.h> #include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> @@ -27,11 +28,61 @@ struct flow_offload_work { (__match)->dissector.offset[__type] = \ offsetof(struct nf_flow_key, __field) +static void nf_flow_rule_lwt_match(struct nf_flow_match *match, + struct ip_tunnel_info *tun_info) +{ + struct nf_flow_key *mask = &match->mask; + struct nf_flow_key *key = &match->key; + unsigned int enc_keys; + + if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX)) + return; + + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); + key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id); + mask->enc_key_id.keyid = 0xffffffff; + enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL); + + if (ip_tunnel_info_af(tun_info) == AF_INET) { + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + enc_ipv4); + key->enc_ipv4.src = tun_info->key.u.ipv4.dst; + key->enc_ipv4.dst = tun_info->key.u.ipv4.src; + if (key->enc_ipv4.src) + mask->enc_ipv4.src = 0xffffffff; + if (key->enc_ipv4.dst) + mask->enc_ipv4.dst = 0xffffffff; + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + } else { + memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst, + sizeof(struct in6_addr)); + memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src, + sizeof(struct in6_addr)); + if (memcmp(&key->enc_ipv6.src, &in6addr_any, + sizeof(struct in6_addr))) + memset(&key->enc_ipv6.src, 0xff, + sizeof(struct in6_addr)); + if (memcmp(&key->enc_ipv6.dst, &in6addr_any, + sizeof(struct in6_addr))) + memset(&key->enc_ipv6.dst, 0xff, + sizeof(struct in6_addr)); + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } + + match->dissector.used_keys |= enc_keys; +} + static int nf_flow_rule_match(struct nf_flow_match *match, - const struct flow_offload_tuple *tuple) + const struct flow_offload_tuple *tuple, + struct dst_entry *other_dst) { struct nf_flow_key *mask = &match->mask; struct nf_flow_key *key = &match->key; + struct ip_tunnel_info *tun_info; NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); @@ -41,6 +92,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match, NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); + if (other_dst->lwtstate) { + tun_info = lwt_tun_info(other_dst->lwtstate); + nf_flow_rule_lwt_match(match, tun_info); + } + key->meta.ingress_ifindex = tuple->iifidx; mask->meta.ingress_ifindex = 0xffffffff; @@ -419,10 +475,52 @@ static void flow_offload_redirect(const struct flow_offload *flow, dev_hold(rt->dst.dev); } +static void flow_offload_encap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action_entry *entry; + struct dst_entry *dst; + + dst = flow->tuplehash[dir].tuple.dst_cache; + if (dst->lwtstate) { + struct ip_tunnel_info *tun_info; + + tun_info = lwt_tun_info(dst->lwtstate); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { + entry = flow_action_entry_next(flow_rule); + entry->id = FLOW_ACTION_TUNNEL_ENCAP; + entry->tunnel = tun_info; + } + } +} + +static void flow_offload_decap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action_entry *entry; + struct dst_entry *dst; + + dst = flow->tuplehash[!dir].tuple.dst_cache; + if (dst->lwtstate) { + struct ip_tunnel_info *tun_info; + + tun_info = lwt_tun_info(dst->lwtstate); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { + entry = flow_action_entry_next(flow_rule); + entry->id = FLOW_ACTION_TUNNEL_DECAP; + } + } +} + int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + flow_offload_decap_tunnel(flow, dir, flow_rule); + flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; @@ -449,6 +547,9 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + flow_offload_decap_tunnel(flow, dir, flow_rule); + flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; @@ -479,6 +580,7 @@ nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload *flow = offload->flow; const struct flow_offload_tuple *tuple; struct nf_flow_rule *flow_rule; + struct dst_entry *other_dst; int err = -ENOMEM; flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); @@ -494,7 +596,8 @@ nf_flow_offload_rule_alloc(struct net *net, flow_rule->rule->match.key = &flow_rule->match.key; tuple = &flow->tuplehash[dir].tuple; - err = nf_flow_rule_match(&flow_rule->match, tuple); + other_dst = flow->tuplehash[!dir].tuple.dst_cache; + err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); if (err < 0) goto err_flow_match; @@ -574,6 +677,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct nf_flow_rule *flow_rule, enum flow_offload_tuple_dir dir, int priority, int cmd, + struct flow_stats *stats, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; @@ -598,6 +702,9 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, } mutex_unlock(&flowtable->flow_block_lock); + if (cmd == FLOW_CLS_STATS) + memcpy(stats, &cls_flow.stats, sizeof(*stats)); + return i; } @@ -607,7 +714,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload, { return nf_flow_offload_tuple(offload->flowtable, offload->flow, flow_rule, dir, offload->priority, - FLOW_CLS_REPLACE, + FLOW_CLS_REPLACE, NULL, &offload->flowtable->flow_block.cb_list); } @@ -615,7 +722,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, - offload->priority, FLOW_CLS_DESTROY, + offload->priority, FLOW_CLS_DESTROY, NULL, &offload->flowtable->flow_block.cb_list); } @@ -661,21 +768,9 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir, struct flow_stats *stats) { - struct nf_flowtable *flowtable = offload->flowtable; - struct flow_cls_offload cls_flow = {}; - struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; - __be16 proto = ETH_P_ALL; - - nf_flow_offload_init(&cls_flow, proto, offload->priority, - FLOW_CLS_STATS, - &offload->flow->tuplehash[dir].tuple, &extack); - - mutex_lock(&flowtable->flow_block_lock); - list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) - block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); - mutex_unlock(&flowtable->flow_block_lock); - memcpy(stats, &cls_flow.stats, sizeof(*stats)); + nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, + offload->priority, FLOW_CLS_STATS, stats, + &offload->flowtable->flow_block.cb_list); } static void flow_offload_work_stats(struct flow_offload_work *offload) @@ -820,25 +915,47 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, return err; } -static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, - struct nf_flowtable *flowtable, - struct net_device *dev, - enum flow_block_command cmd, - struct netlink_ext_ack *extack) +static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, + struct net *net, + enum flow_block_command cmd, + struct nf_flowtable *flowtable, + struct netlink_ext_ack *extack) { - int err; - - if (!dev->netdev_ops->ndo_setup_tc) - return -EOPNOTSUPP; - memset(bo, 0, sizeof(*bo)); - bo->net = dev_net(dev); + bo->net = net; bo->block = &flowtable->flow_block; bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; INIT_LIST_HEAD(&bo->cb_list); +} +static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, + struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd, + struct netlink_ext_ack *extack) +{ + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, + extack); + flow_indr_block_call(dev, bo, cmd); + + if (list_empty(&bo->cb_list)) + return -EOPNOTSUPP; + + return 0; +} + +static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, + struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd, + struct netlink_ext_ack *extack) +{ + int err; + + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, + extack); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); if (err < 0) return err; @@ -857,7 +974,12 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, if (!nf_flowtable_hw_offload(flowtable)) return 0; - err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack); + if (dev->netdev_ops->ndo_setup_tc) + err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, + &extack); + else + err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, + &extack); if (err < 0) return err; @@ -865,10 +987,75 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, } EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); +static void nf_flow_table_indr_block_ing_cmd(struct net_device *dev, + struct nf_flowtable *flowtable, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo; + + if (!flowtable) + return; + + nf_flow_table_block_offload_init(&bo, dev_net(dev), cmd, flowtable, + &extack); + + cb(dev, cb_priv, TC_SETUP_FT, &bo); + + nf_flow_table_block_setup(flowtable, &bo, cmd); +} + +static void nf_flow_table_indr_block_cb_cmd(struct nf_flowtable *flowtable, + struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)) + return; + + nf_flow_table_indr_block_ing_cmd(dev, flowtable, cb, cb_priv, cmd); +} + +static void nf_flow_table_indr_block_cb(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + struct net *net = dev_net(dev); + struct nft_flowtable *nft_ft; + struct nft_table *table; + struct nft_hook *hook; + + mutex_lock(&net->nft.commit_mutex); + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(nft_ft, &table->flowtables, list) { + list_for_each_entry(hook, &nft_ft->hook_list, list) { + if (hook->ops.dev != dev) + continue; + + nf_flow_table_indr_block_cb_cmd(&nft_ft->data, + dev, cb, + cb_priv, cmd); + } + } + } + mutex_unlock(&net->nft.commit_mutex); +} + +static struct flow_indr_block_entry block_ing_entry = { + .cb = nf_flow_table_indr_block_cb, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + int nf_flow_table_offload_init(void) { INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler); + flow_indr_add_block_cb(&block_ing_entry); + return 0; } @@ -877,6 +1064,8 @@ void nf_flow_table_offload_exit(void) struct flow_offload_work *offload, *next; LIST_HEAD(offload_pending_list); + flow_indr_del_block_cb(&block_ing_entry); + cancel_work_sync(&nf_flow_offload_work); list_for_each_entry_safe(offload, next, &offload_pending_list, list) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 38c680f28f15..f92fb6003745 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2523,8 +2523,8 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx, module_put(type->owner); } -struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, - const struct nlattr *nla) +static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, + const struct nlattr *nla) { struct nft_expr_info info; struct nft_expr *expr; @@ -3266,25 +3266,17 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, /* * Sets */ - -static LIST_HEAD(nf_tables_set_types); - -int nft_register_set(struct nft_set_type *type) -{ - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail_rcu(&type->list, &nf_tables_set_types); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - return 0; -} -EXPORT_SYMBOL_GPL(nft_register_set); - -void nft_unregister_set(struct nft_set_type *type) -{ - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del_rcu(&type->list); - nfnl_unlock(NFNL_SUBSYS_NFTABLES); -} -EXPORT_SYMBOL_GPL(nft_unregister_set); +static const struct nft_set_type *nft_set_types[] = { + &nft_set_hash_fast_type, + &nft_set_hash_type, + &nft_set_rhash_type, + &nft_set_bitmap_type, + &nft_set_rbtree_type, +#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2) + &nft_set_pipapo_avx2_type, +#endif + &nft_set_pipapo_type, +}; #define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \ NFT_SET_TIMEOUT | NFT_SET_OBJECT | \ @@ -3310,15 +3302,11 @@ nft_select_set_ops(const struct nft_ctx *ctx, struct nft_set_estimate est, best; const struct nft_set_type *type; u32 flags = 0; + int i; lockdep_assert_held(&ctx->net->nft.commit_mutex); lockdep_nfnl_nft_mutex_not_held(); -#ifdef CONFIG_MODULES - if (list_empty(&nf_tables_set_types)) { - if (nft_request_module(ctx->net, "nft-set") == -EAGAIN) - return ERR_PTR(-EAGAIN); - } -#endif + if (nla[NFTA_SET_FLAGS] != NULL) flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); @@ -3327,7 +3315,8 @@ nft_select_set_ops(const struct nft_ctx *ctx, best.lookup = ~0; best.space = ~0; - list_for_each_entry(type, &nf_tables_set_types, list) { + for (i = 0; i < ARRAY_SIZE(nft_set_types); i++) { + type = nft_set_types[i]; ops = &type->ops; if (!nft_set_ops_candidate(type, flags)) @@ -3358,11 +3347,6 @@ nft_select_set_ops(const struct nft_ctx *ctx, break; } - if (!try_module_get(type->owner)) - continue; - if (bops != NULL) - module_put(to_set_type(bops)->owner); - bops = ops; best = est; } @@ -4061,10 +4045,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, size = ops->privsize(nla, &desc); set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); - if (!set) { - err = -ENOMEM; - goto err1; - } + if (!set) + return -ENOMEM; name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); if (!name) { @@ -4123,8 +4105,6 @@ err3: kfree(set->name); err2: kvfree(set); -err1: - module_put(to_set_type(ops)->owner); return err; } @@ -4134,7 +4114,6 @@ static void nft_set_destroy(struct nft_set *set) return; set->ops->destroy(set); - module_put(to_set_type(set->ops)->owner); kfree(set->name); kvfree(set); } @@ -4312,7 +4291,6 @@ const struct nft_set_ext_type nft_set_ext_types[] = { .align = __alignof__(u32), }, }; -EXPORT_SYMBOL_GPL(nft_set_ext_types); /* * Set elements @@ -4801,6 +4779,36 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } +struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nlattr *attr) +{ + struct nft_expr *expr; + int err; + + expr = nft_expr_init(ctx, attr); + if (IS_ERR(expr)) + return expr; + + err = -EOPNOTSUPP; + if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_set_elem_expr; + + if (expr->ops->type->flags & NFT_EXPR_GC) { + if (set->flags & NFT_SET_TIMEOUT) + goto err_set_elem_expr; + if (!set->ops->gc_init) + goto err_set_elem_expr; + set->ops->gc_init(set); + } + + return expr; + +err_set_elem_expr: + nft_expr_destroy(ctx, expr); + return ERR_PTR(err); +} + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, @@ -4883,6 +4891,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_object *obj = NULL; + struct nft_expr *expr = NULL; struct nft_userdata *udata; struct nft_data_desc desc; struct nft_data data; @@ -4950,10 +4959,17 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } + if (nla[NFTA_SET_ELEM_EXPR] != NULL) { + expr = nft_set_elem_expr_alloc(ctx, set, + nla[NFTA_SET_ELEM_EXPR]); + if (IS_ERR(expr)) + return PTR_ERR(expr); + } + err = nft_setelem_parse_key(ctx, set, &elem.key.val, nla[NFTA_SET_ELEM_KEY]); if (err < 0) - return err; + goto err_set_elem_expr; nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); @@ -4972,6 +4988,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); } + if (expr) + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPR, + expr->ops->size); + if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { if (!(set->flags & NFT_SET_OBJECT)) { err = -EINVAL; @@ -5056,6 +5076,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, *nft_set_ext_obj(ext) = obj; obj->use++; } + if (expr) { + memcpy(nft_set_ext_expr(ext), expr, expr->ops->size); + kfree(expr); + } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) @@ -5111,6 +5135,9 @@ err_parse_key_end: nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); +err_set_elem_expr: + if (expr != NULL) + nft_expr_destroy(ctx, expr); return err; } @@ -5365,7 +5392,6 @@ void nft_set_gc_batch_release(struct rcu_head *rcu) nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); kfree(gcb); } -EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, gfp_t gfp) @@ -5378,7 +5404,6 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, gcb->head.set = set; return gcb; } -EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); /* * Stateful objects diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c deleted file mode 100644 index 586b621007eb..000000000000 --- a/net/netfilter/nf_tables_set_core.c +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/module.h> -#include <net/netfilter/nf_tables_core.h> - -static int __init nf_tables_set_module_init(void) -{ - nft_register_set(&nft_set_hash_fast_type); - nft_register_set(&nft_set_hash_type); - nft_register_set(&nft_set_rhash_type); - nft_register_set(&nft_set_bitmap_type); - nft_register_set(&nft_set_rbtree_type); - nft_register_set(&nft_set_pipapo_type); - - return 0; -} - -static void __exit nf_tables_set_module_exit(void) -{ - nft_unregister_set(&nft_set_pipapo_type); - nft_unregister_set(&nft_set_rbtree_type); - nft_unregister_set(&nft_set_bitmap_type); - nft_unregister_set(&nft_set_rhash_type); - nft_unregister_set(&nft_set_hash_type); - nft_unregister_set(&nft_set_hash_fast_type); -} - -module_init(nf_tables_set_module_init); -module_exit(nf_tables_set_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 2481470dec36..5827117f2635 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -33,7 +33,7 @@ struct nf_acct { refcount_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; - char data[0]; + char data[]; }; struct nfacct_filter { diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 0ed2281f03be..bc37d6c59db4 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -93,7 +93,7 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { static int nft_bitwise_init_bool(struct nft_bitwise *priv, const struct nlattr *const tb[]) { - struct nft_data_desc d1, d2; + struct nft_data_desc mask, xor; int err; if (tb[NFTA_BITWISE_DATA]) @@ -103,29 +103,29 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv, !tb[NFTA_BITWISE_XOR]) return -EINVAL; - err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1, + err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->len) { + if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) { err = -EINVAL; goto err1; } - err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2, + err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor, tb[NFTA_BITWISE_XOR]); if (err < 0) goto err1; - if (d2.type != NFT_DATA_VALUE || d2.len != priv->len) { + if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) { err = -EINVAL; goto err2; } return 0; err2: - nft_data_release(&priv->xor, d2.type); + nft_data_release(&priv->xor, xor.type); err1: - nft_data_release(&priv->mask, d1.type); + nft_data_release(&priv->mask, mask.type); return err; } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 683785225a3e..46ab28ec4b53 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -81,7 +81,6 @@ void nft_dynset_eval(const struct nft_expr *expr, const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; const struct nft_set_ext *ext; - const struct nft_expr *sexpr; u64 timeout; if (priv->op == NFT_DYNSET_OP_DELETE) { @@ -91,18 +90,13 @@ void nft_dynset_eval(const struct nft_expr *expr, if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, expr, regs, &ext)) { - sexpr = NULL; - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) - sexpr = nft_set_ext_expr(ext); - if (priv->op == NFT_DYNSET_OP_UPDATE && nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { timeout = priv->timeout ? : set->timeout; *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout; } - if (sexpr != NULL) - sexpr->ops->eval(sexpr, regs, pkt); + nft_set_elem_update_expr(ext, regs, pkt); if (priv->invert) regs->verdict.code = NFT_BREAK; @@ -206,21 +200,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_EVAL)) return -EINVAL; - priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); + priv->expr = nft_set_elem_expr_alloc(ctx, set, + tb[NFTA_DYNSET_EXPR]); if (IS_ERR(priv->expr)) return PTR_ERR(priv->expr); - - err = -EOPNOTSUPP; - if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err1; - - if (priv->expr->ops->type->flags & NFT_EXPR_GC) { - if (set->flags & NFT_SET_TIMEOUT) - goto err1; - if (!set->ops->gc_init) - goto err1; - set->ops->gc_init(set); - } } nft_set_ext_prepare(&priv->tmpl); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 660bad688e2b..1e70359d633c 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -43,6 +43,7 @@ void nft_lookup_eval(const struct nft_expr *expr, nft_data_copy(®s->data[priv->dreg], nft_set_ext_data(ext), set->dlen); + nft_set_elem_update_expr(ext, regs, pkt); } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 87e8d9ba0c9b..1cb2e67e6e03 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -293,8 +293,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, return true; } -struct nft_set_type nft_set_bitmap_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_bitmap_type = { .ops = { .privsize = nft_bitmap_privsize, .elemsize = offsetof(struct nft_bitmap_elem, ext), diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index d350a7cd3af0..4d3f147e8d8d 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -662,8 +662,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features return true; } -struct nft_set_type nft_set_rhash_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_rhash_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT | NFT_SET_EVAL, .ops = { @@ -686,8 +685,7 @@ struct nft_set_type nft_set_rhash_type __read_mostly = { }, }; -struct nft_set_type nft_set_hash_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_hash_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { .privsize = nft_hash_privsize, @@ -706,8 +704,7 @@ struct nft_set_type nft_set_hash_type __read_mostly = { }, }; -struct nft_set_type nft_set_hash_fast_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_hash_fast_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { .privsize = nft_hash_privsize, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4fc0c924ed5d..c1afb6c94edc 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -330,144 +330,22 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/log2.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <uapi/linux/netfilter/nf_tables.h> -#include <net/ipv6.h> /* For the maximum length of a field */ #include <linux/bitmap.h> #include <linux/bitops.h> -/* Count of concatenated fields depends on count of 32-bit nftables registers */ -#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT - -/* Largest supported field size */ -#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) -#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) - -/* Number of bits to be grouped together in lookup table buckets, arbitrary */ -#define NFT_PIPAPO_GROUP_BITS 4 -#define NFT_PIPAPO_GROUPS_PER_BYTE (BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS) - -/* Fields are padded to 32 bits in input registers */ -#define NFT_PIPAPO_GROUPS_PADDED_SIZE(x) \ - (round_up((x) / NFT_PIPAPO_GROUPS_PER_BYTE, sizeof(u32))) -#define NFT_PIPAPO_GROUPS_PADDING(x) \ - (NFT_PIPAPO_GROUPS_PADDED_SIZE((x)) - (x) / NFT_PIPAPO_GROUPS_PER_BYTE) - -/* Number of buckets, given by 2 ^ n, with n grouped bits */ -#define NFT_PIPAPO_BUCKETS (1 << NFT_PIPAPO_GROUP_BITS) - -/* Each n-bit range maps to up to n * 2 rules */ -#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) - -/* Use the rest of mapping table buckets for rule indices, but it makes no sense - * to exceed 32 bits - */ -#if BITS_PER_LONG == 64 -#define NFT_PIPAPO_MAP_TOBITS 32 -#else -#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) -#endif - -/* ...which gives us the highest allowed index for a rule */ -#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ - - (1UL << NFT_PIPAPO_MAP_NBITS)) - -#define nft_pipapo_for_each_field(field, index, match) \ - for ((field) = (match)->f, (index) = 0; \ - (index) < (match)->field_count; \ - (index)++, (field)++) - -/** - * union nft_pipapo_map_bucket - Bucket of mapping table - * @to: First rule number (in next field) this rule maps to - * @n: Number of rules (in next field) this rule maps to - * @e: If there's no next field, pointer to element this rule maps to - */ -union nft_pipapo_map_bucket { - struct { -#if BITS_PER_LONG == 64 - static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); - u32 to; - - static_assert(NFT_PIPAPO_MAP_NBITS <= 32); - u32 n; -#else - unsigned long to:NFT_PIPAPO_MAP_TOBITS; - unsigned long n:NFT_PIPAPO_MAP_NBITS; -#endif - }; - struct nft_pipapo_elem *e; -}; - -/** - * struct nft_pipapo_field - Lookup, mapping tables and related data for a field - * @groups: Amount of 4-bit groups - * @rules: Number of inserted rules - * @bsize: Size of each bucket in lookup table, in longs - * @lt: Lookup table: 'groups' rows of NFT_PIPAPO_BUCKETS buckets - * @mt: Mapping table: one bucket per rule - */ -struct nft_pipapo_field { - int groups; - unsigned long rules; - size_t bsize; - unsigned long *lt; - union nft_pipapo_map_bucket *mt; -}; - -/** - * struct nft_pipapo_match - Data used for lookup and matching - * @field_count Amount of fields in set - * @scratch: Preallocated per-CPU maps for partial matching results - * @bsize_max: Maximum lookup table bucket size of all fields, in longs - * @rcu Matching data is swapped on commits - * @f: Fields, with lookup and mapping tables - */ -struct nft_pipapo_match { - int field_count; - unsigned long * __percpu *scratch; - size_t bsize_max; - struct rcu_head rcu; - struct nft_pipapo_field f[0]; -}; +#include "nft_set_pipapo_avx2.h" +#include "nft_set_pipapo.h" /* Current working bitmap index, toggled between field matches */ static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); /** - * struct nft_pipapo - Representation of a set - * @match: Currently in-use matching data - * @clone: Copy where pending insertions and deletions are kept - * @groups: Total amount of 4-bit groups for fields in this set - * @width: Total bytes to be matched for one packet, including padding - * @dirty: Working copy has pending insertions or deletions - * @last_gc: Timestamp of last garbage collection run, jiffies - */ -struct nft_pipapo { - struct nft_pipapo_match __rcu *match; - struct nft_pipapo_match *clone; - int groups; - int width; - bool dirty; - unsigned long last_gc; -}; - -struct nft_pipapo_elem; - -/** - * struct nft_pipapo_elem - API-facing representation of single set element - * @ext: nftables API extensions - */ -struct nft_pipapo_elem { - struct nft_set_ext ext; -}; - -/** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits * @len: Length of bitmap in longs @@ -484,9 +362,8 @@ struct nft_pipapo_elem { * * Return: -1 on no match, bit position on 'match_only', 0 otherwise. */ -static int pipapo_refill(unsigned long *map, int len, int rules, - unsigned long *dst, union nft_pipapo_map_bucket *mt, - bool match_only) +int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool match_only) { unsigned long bitset; int k, ret = -1; @@ -559,26 +436,18 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; - unsigned long *lt = f->lt; - int b, group; + int b; - /* For each 4-bit group: select lookup table bucket depending on + /* For each bit group: select lookup table bucket depending on * packet bytes value, then AND bucket value */ - for (group = 0; group < f->groups; group += 2) { - u8 v; - - v = *rp >> 4; - __bitmap_and(res_map, res_map, lt + v * f->bsize, - f->bsize * BITS_PER_LONG); - lt += f->bsize * NFT_PIPAPO_BUCKETS; - - v = *rp & 0x0f; - rp++; - __bitmap_and(res_map, res_map, lt + v * f->bsize, - f->bsize * BITS_PER_LONG); - lt += f->bsize * NFT_PIPAPO_BUCKETS; - } + if (likely(f->bb == 8)) + pipapo_and_field_buckets_8bit(f, res_map, rp); + else + pipapo_and_field_buckets_4bit(f, res_map, rp); + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + + rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); /* Now populate the bitmap for the next field, unless this is * the last field, in which case return the matched 'ext' @@ -621,7 +490,7 @@ next_match: map_index = !map_index; swap(res_map, fill_map); - rp += NFT_PIPAPO_GROUPS_PADDING(f->groups); + rp += NFT_PIPAPO_GROUPS_PADDING(f); } out: @@ -669,26 +538,19 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; - unsigned long *lt = f->lt; - int b, group; + int b; - /* For each 4-bit group: select lookup table bucket depending on + /* For each bit group: select lookup table bucket depending on * packet bytes value, then AND bucket value */ - for (group = 0; group < f->groups; group++) { - u8 v; - - if (group % 2) { - v = *data & 0x0f; - data++; - } else { - v = *data >> 4; - } - __bitmap_and(res_map, res_map, lt + v * f->bsize, - f->bsize * BITS_PER_LONG); + if (f->bb == 8) + pipapo_and_field_buckets_8bit(f, res_map, data); + else if (f->bb == 4) + pipapo_and_field_buckets_4bit(f, res_map, data); + else + BUG(); - lt += f->bsize * NFT_PIPAPO_BUCKETS; - } + data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); /* Now populate the bitmap for the next field, unless this is * the last field, in which case return the matched 'ext' @@ -713,7 +575,7 @@ next_match: goto out; } - data += NFT_PIPAPO_GROUPS_PADDING(f->groups); + data += NFT_PIPAPO_GROUPS_PADDING(f); /* Swap bitmap indices: fill_map will be the initial bitmap for * the next field (i.e. the new res_map), and res_map is @@ -736,8 +598,8 @@ out: * @elem: nftables API element representation containing key data * @flags: Unused */ -void *nft_pipapo_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { return pipapo_get(net, set, (const u8 *)elem->key.val.data, nft_genmask_cur(net)); @@ -763,6 +625,10 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules) int group, bucket; new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG); +#ifdef NFT_PIPAPO_ALIGN + new_bucket_size = roundup(new_bucket_size, + NFT_PIPAPO_ALIGN / sizeof(*new_lt)); +#endif if (new_bucket_size == f->bsize) goto mt; @@ -772,15 +638,18 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules) else copy = new_bucket_size; - new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS * new_bucket_size * - sizeof(*new_lt), GFP_KERNEL); + new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) * + new_bucket_size * sizeof(*new_lt) + + NFT_PIPAPO_ALIGN_HEADROOM, + GFP_KERNEL); if (!new_lt) return -ENOMEM; - new_p = new_lt; - old_p = old_lt; + new_p = NFT_PIPAPO_LT_ALIGN(new_lt); + old_p = NFT_PIPAPO_LT_ALIGN(old_lt); + for (group = 0; group < f->groups; group++) { - for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS; bucket++) { + for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS(f->bb); bucket++) { memcpy(new_p, old_p, copy * sizeof(*new_p)); new_p += copy; old_p += copy; @@ -807,7 +676,7 @@ mt: if (new_lt) { f->bsize = new_bucket_size; - f->lt = new_lt; + NFT_PIPAPO_LT_ASSIGN(f, new_lt); kvfree(old_lt); } @@ -829,13 +698,196 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group, { unsigned long *pos; - pos = f->lt + f->bsize * NFT_PIPAPO_BUCKETS * group; + pos = NFT_PIPAPO_LT_ALIGN(f->lt); + pos += f->bsize * NFT_PIPAPO_BUCKETS(f->bb) * group; pos += f->bsize * v; __set_bit(rule, pos); } /** + * pipapo_lt_4b_to_8b() - Switch lookup table group width from 4 bits to 8 bits + * @old_groups: Number of current groups + * @bsize: Size of one bucket, in longs + * @old_lt: Pointer to the current lookup table + * @new_lt: Pointer to the new, pre-allocated lookup table + * + * Each bucket with index b in the new lookup table, belonging to group g, is + * filled with the bit intersection between: + * - bucket with index given by the upper 4 bits of b, from group g, and + * - bucket with index given by the lower 4 bits of b, from group g + 1 + * + * That is, given buckets from the new lookup table N(x, y) and the old lookup + * table O(x, y), with x bucket index, and y group index: + * + * N(b, g) := O(b / 16, g) & O(b % 16, g + 1) + * + * This ensures equivalence of the matching results on lookup. Two examples in + * pictures: + * + * bucket + * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ... 254 255 + * 0 ^ + * 1 | ^ + * ... ( & ) | + * / \ | + * / \ .-( & )-. + * / bucket \ | | + * group 0 / 1 2 3 \ 4 5 6 7 8 9 10 11 12 13 |14 15 | + * 0 / \ | | + * 1 \ | | + * 2 | --' + * 3 '- + * ... + */ +static void pipapo_lt_4b_to_8b(int old_groups, int bsize, + unsigned long *old_lt, unsigned long *new_lt) +{ + int g, b, i; + + for (g = 0; g < old_groups / 2; g++) { + int src_g0 = g * 2, src_g1 = g * 2 + 1; + + for (b = 0; b < NFT_PIPAPO_BUCKETS(8); b++) { + int src_b0 = b / NFT_PIPAPO_BUCKETS(4); + int src_b1 = b % NFT_PIPAPO_BUCKETS(4); + int src_i0 = src_g0 * NFT_PIPAPO_BUCKETS(4) + src_b0; + int src_i1 = src_g1 * NFT_PIPAPO_BUCKETS(4) + src_b1; + + for (i = 0; i < bsize; i++) { + *new_lt = old_lt[src_i0 * bsize + i] & + old_lt[src_i1 * bsize + i]; + new_lt++; + } + } + } +} + +/** + * pipapo_lt_8b_to_4b() - Switch lookup table group width from 8 bits to 4 bits + * @old_groups: Number of current groups + * @bsize: Size of one bucket, in longs + * @old_lt: Pointer to the current lookup table + * @new_lt: Pointer to the new, pre-allocated lookup table + * + * Each bucket with index b in the new lookup table, belonging to group g, is + * filled with the bit union of: + * - all the buckets with index such that the upper four bits of the lower byte + * equal b, from group g, with g odd + * - all the buckets with index such that the lower four bits equal b, from + * group g, with g even + * + * That is, given buckets from the new lookup table N(x, y) and the old lookup + * table O(x, y), with x bucket index, and y group index: + * + * - with g odd: N(b, g) := U(O(x, g) for each x : x = (b & 0xf0) >> 4) + * - with g even: N(b, g) := U(O(x, g) for each x : x = b & 0x0f) + * + * where U() denotes the arbitrary union operation (binary OR of n terms). This + * ensures equivalence of the matching results on lookup. + */ +static void pipapo_lt_8b_to_4b(int old_groups, int bsize, + unsigned long *old_lt, unsigned long *new_lt) +{ + int g, b, bsrc, i; + + memset(new_lt, 0, old_groups * 2 * NFT_PIPAPO_BUCKETS(4) * bsize * + sizeof(unsigned long)); + + for (g = 0; g < old_groups * 2; g += 2) { + int src_g = g / 2; + + for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) { + for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g; + bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1); + bsrc++) { + if (((bsrc & 0xf0) >> 4) != b) + continue; + + for (i = 0; i < bsize; i++) + new_lt[i] |= old_lt[bsrc * bsize + i]; + } + + new_lt += bsize; + } + + for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) { + for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g; + bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1); + bsrc++) { + if ((bsrc & 0x0f) != b) + continue; + + for (i = 0; i < bsize; i++) + new_lt[i] |= old_lt[bsrc * bsize + i]; + } + + new_lt += bsize; + } + } +} + +/** + * pipapo_lt_bits_adjust() - Adjust group size for lookup table if needed + * @f: Field containing lookup table + */ +static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) +{ + unsigned long *new_lt; + int groups, bb; + size_t lt_size; + + lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * + sizeof(*f->lt); + + if (f->bb == NFT_PIPAPO_GROUP_BITS_SMALL_SET && + lt_size > NFT_PIPAPO_LT_SIZE_HIGH) { + groups = f->groups * 2; + bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET; + + lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * + sizeof(*f->lt); + } else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET && + lt_size < NFT_PIPAPO_LT_SIZE_LOW) { + groups = f->groups / 2; + bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET; + + lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * + sizeof(*f->lt); + + /* Don't increase group width if the resulting lookup table size + * would exceed the upper size threshold for a "small" set. + */ + if (lt_size > NFT_PIPAPO_LT_SIZE_HIGH) + return; + } else { + return; + } + + new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL); + if (!new_lt) + return; + + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + if (f->bb == 4 && bb == 8) { + pipapo_lt_4b_to_8b(f->groups, f->bsize, + NFT_PIPAPO_LT_ALIGN(f->lt), + NFT_PIPAPO_LT_ALIGN(new_lt)); + } else if (f->bb == 8 && bb == 4) { + pipapo_lt_8b_to_4b(f->groups, f->bsize, + NFT_PIPAPO_LT_ALIGN(f->lt), + NFT_PIPAPO_LT_ALIGN(new_lt)); + } else { + BUG(); + } + + f->groups = groups; + f->bb = bb; + kvfree(f->lt); + NFT_PIPAPO_LT_ASSIGN(f, new_lt); +} + +/** * pipapo_insert() - Insert new rule in field given input key and mask length * @f: Field containing lookup table * @k: Input key for classification, without nftables padding @@ -849,7 +901,7 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group, static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules++, group, ret; + int rule = f->rules++, group, ret, bit_offset = 0; ret = pipapo_resize(f, f->rules - 1, f->rules); if (ret) @@ -859,28 +911,33 @@ static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int i, v; u8 mask; - if (group % 2) - v = k[group / 2] & 0x0f; - else - v = k[group / 2] >> 4; + v = k[group / (BITS_PER_BYTE / f->bb)]; + v &= GENMASK(BITS_PER_BYTE - bit_offset - 1, 0); + v >>= (BITS_PER_BYTE - bit_offset) - f->bb; + + bit_offset += f->bb; + bit_offset %= BITS_PER_BYTE; - if (mask_bits >= (group + 1) * 4) { + if (mask_bits >= (group + 1) * f->bb) { /* Not masked */ pipapo_bucket_set(f, rule, group, v); - } else if (mask_bits <= group * 4) { + } else if (mask_bits <= group * f->bb) { /* Completely masked */ - for (i = 0; i < NFT_PIPAPO_BUCKETS; i++) + for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++) pipapo_bucket_set(f, rule, group, i); } else { /* The mask limit falls on this group */ - mask = 0x0f >> (mask_bits - group * 4); - for (i = 0; i < NFT_PIPAPO_BUCKETS; i++) { + mask = GENMASK(f->bb - 1, 0); + mask >>= mask_bits - group * f->bb; + for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++) { if ((i & ~mask) == (v & ~mask)) pipapo_bucket_set(f, rule, group, i); } } } + pipapo_lt_bits_adjust(f); + return 1; } @@ -1053,8 +1110,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, for_each_possible_cpu(i) { unsigned long *scratch; +#ifdef NFT_PIPAPO_ALIGN + unsigned long *scratch_aligned; +#endif - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2, + scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { /* On failure, there's no need to undo previous @@ -1070,6 +1131,11 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, kfree(*per_cpu_ptr(clone->scratch, i)); *per_cpu_ptr(clone->scratch, i) = scratch; + +#ifdef NFT_PIPAPO_ALIGN + scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); + *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; +#endif } return 0; @@ -1123,11 +1189,11 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, return -ENOSPC; if (memcmp(start_p, end_p, - f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) > 0) + f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) > 0) return -EINVAL; - start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); - end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); + start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } /* Insert */ @@ -1141,22 +1207,19 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, rulemap[i].to = f->rules; ret = memcmp(start, end, - f->groups / NFT_PIPAPO_GROUPS_PER_BYTE); - if (!ret) { - ret = pipapo_insert(f, start, - f->groups * NFT_PIPAPO_GROUP_BITS); - } else { - ret = pipapo_expand(f, start, end, - f->groups * NFT_PIPAPO_GROUP_BITS); - } + f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)); + if (!ret) + ret = pipapo_insert(f, start, f->groups * f->bb); + else + ret = pipapo_expand(f, start, end, f->groups * f->bb); if (f->bsize > bsize_max) bsize_max = f->bsize; rulemap[i].n = ret; - start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); - end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); + start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } if (!*this_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) { @@ -1200,23 +1263,35 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; +#ifdef NFT_PIPAPO_ALIGN + new->scratch_aligned = alloc_percpu(*new->scratch_aligned); + if (!new->scratch_aligned) + goto out_scratch; +#endif + rcu_head_init(&new->rcu); src = old->f; dst = new->f; for (i = 0; i < old->field_count; i++) { + unsigned long *new_lt; + memcpy(dst, src, offsetof(struct nft_pipapo_field, lt)); - dst->lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS * - src->bsize * sizeof(*dst->lt), - GFP_KERNEL); - if (!dst->lt) + new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) * + src->bsize * sizeof(*dst->lt) + + NFT_PIPAPO_ALIGN_HEADROOM, + GFP_KERNEL); + if (!new_lt) goto out_lt; - memcpy(dst->lt, src->lt, + NFT_PIPAPO_LT_ASSIGN(dst, new_lt); + + memcpy(NFT_PIPAPO_LT_ALIGN(new_lt), + NFT_PIPAPO_LT_ALIGN(src->lt), src->bsize * sizeof(*dst->lt) * - src->groups * NFT_PIPAPO_BUCKETS); + src->groups * NFT_PIPAPO_BUCKETS(src->bb)); dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL); if (!dst->mt) @@ -1237,8 +1312,11 @@ out_lt: kvfree(dst->lt); dst--; } - free_percpu(new->scratch); +#ifdef NFT_PIPAPO_ALIGN + free_percpu(new->scratch_aligned); +#endif out_scratch: + free_percpu(new->scratch); kfree(new); return ERR_PTR(-ENOMEM); @@ -1394,9 +1472,10 @@ static void pipapo_drop(struct nft_pipapo_match *m, unsigned long *pos; int b; - pos = f->lt + g * NFT_PIPAPO_BUCKETS * f->bsize; + pos = NFT_PIPAPO_LT_ALIGN(f->lt) + g * + NFT_PIPAPO_BUCKETS(f->bb) * f->bsize; - for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) { + for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) { bitmap_cut(pos, pos, rulemap[i].to, rulemap[i].n, f->bsize * BITS_PER_LONG); @@ -1414,6 +1493,8 @@ static void pipapo_drop(struct nft_pipapo_match *m, ; } f->rules -= rulemap[i].n; + + pipapo_lt_bits_adjust(f); } } @@ -1498,6 +1579,9 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); +#ifdef NFT_PIPAPO_ALIGN + free_percpu(m->scratch_aligned); +#endif free_percpu(m->scratch); pipapo_free_fields(m); @@ -1690,30 +1774,33 @@ static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, static int pipapo_get_boundaries(struct nft_pipapo_field *f, int first_rule, int rule_count, u8 *left, u8 *right) { + int g, mask_len = 0, bit_offset = 0; u8 *l = left, *r = right; - int g, mask_len = 0; for (g = 0; g < f->groups; g++) { int b, x0, x1; x0 = -1; x1 = -1; - for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) { + for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) { unsigned long *pos; - pos = f->lt + (g * NFT_PIPAPO_BUCKETS + b) * f->bsize; + pos = NFT_PIPAPO_LT_ALIGN(f->lt) + + (g * NFT_PIPAPO_BUCKETS(f->bb) + b) * f->bsize; if (test_bit(first_rule, pos) && x0 == -1) x0 = b; if (test_bit(first_rule + rule_count - 1, pos)) x1 = b; } - if (g % 2) { - *(l++) |= x0 & 0x0f; - *(r++) |= x1 & 0x0f; - } else { - *l |= x0 << 4; - *r |= x1 << 4; + *l |= x0 << (BITS_PER_BYTE - f->bb - bit_offset); + *r |= x1 << (BITS_PER_BYTE - f->bb - bit_offset); + + bit_offset += f->bb; + if (bit_offset >= BITS_PER_BYTE) { + bit_offset %= BITS_PER_BYTE; + l++; + r++; } if (x1 - x0 == 0) @@ -1748,8 +1835,9 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, pipapo_get_boundaries(f, first_rule, rule_count, left, right); - return !memcmp(start, left, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) && - !memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE); + return !memcmp(start, left, + f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) && + !memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)); } /** @@ -1801,8 +1889,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, rules_fx = f->mt[start].n; start = f->mt[start].to; - match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); - match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups); + match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } if (i == m->field_count) { @@ -1885,56 +1973,24 @@ static u64 nft_pipapo_privsize(const struct nlattr * const nla[], } /** - * nft_pipapo_estimate() - Estimate set size, space and lookup complexity - * @desc: Set description, element count and field description used here + * nft_pipapo_estimate() - Set size, space and lookup complexity + * @desc: Set description, element count and field description used * @features: Flags: NFT_SET_INTERVAL needs to be there * @est: Storage for estimation data * - * The size for this set type can vary dramatically, as it depends on the number - * of rules (composing netmasks) the entries expand to. We compute the worst - * case here. - * - * In general, for a non-ranged entry or a single composing netmask, we need - * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that - * is, each input bit needs four bits of matching data), plus a bucket in the - * mapping table for each field. - * - * Return: true only for compatible range concatenations + * Return: true if set description is compatible, false otherwise */ static bool nft_pipapo_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { - unsigned long entry_size; - int i; - - if (!(features & NFT_SET_INTERVAL) || desc->field_count <= 1) + if (!(features & NFT_SET_INTERVAL) || + desc->field_count < NFT_PIPAPO_MIN_FIELDS) return false; - for (i = 0, entry_size = 0; i < desc->field_count; i++) { - unsigned long rules; - - if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES) - return false; - - /* Worst-case ranges for each concatenated field: each n-bit - * field can expand to up to n * 2 rules in each bucket, and - * each rule also needs a mapping bucket. - */ - rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2; - entry_size += rules * NFT_PIPAPO_BUCKETS / BITS_PER_BYTE; - entry_size += rules * sizeof(union nft_pipapo_map_bucket); - } - - /* Rules in lookup and mapping tables are needed for each entry */ - est->size = desc->size * entry_size; - if (est->size && div_u64(est->size, desc->size) != entry_size) + est->size = pipapo_estimate_size(desc); + if (!est->size) return false; - est->size += sizeof(struct nft_pipapo) + - sizeof(struct nft_pipapo_match) * 2; - - est->size += sizeof(struct nft_pipapo_field) * desc->field_count; - est->lookup = NFT_SET_CLASS_O_LOG_N; est->space = NFT_SET_CLASS_O_N; @@ -1961,38 +2017,52 @@ static int nft_pipapo_init(const struct nft_set *set, struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; struct nft_pipapo_field *f; - int err, i; + int err, i, field_count; + + field_count = desc->field_count ? : 1; - if (desc->field_count > NFT_PIPAPO_MAX_FIELDS) + if (field_count > NFT_PIPAPO_MAX_FIELDS) return -EINVAL; - m = kmalloc(sizeof(*priv->match) + sizeof(*f) * desc->field_count, + m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count, GFP_KERNEL); if (!m) return -ENOMEM; - m->field_count = desc->field_count; + m->field_count = field_count; m->bsize_max = 0; m->scratch = alloc_percpu(unsigned long *); if (!m->scratch) { err = -ENOMEM; - goto out_free; + goto out_scratch; } for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; +#ifdef NFT_PIPAPO_ALIGN + m->scratch_aligned = alloc_percpu(unsigned long *); + if (!m->scratch_aligned) { + err = -ENOMEM; + goto out_free; + } + for_each_possible_cpu(i) + *per_cpu_ptr(m->scratch_aligned, i) = NULL; +#endif + rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { - f->groups = desc->field_len[i] * NFT_PIPAPO_GROUPS_PER_BYTE; - priv->groups += f->groups; + int len = desc->field_len[i] ? : set->klen; - priv->width += round_up(desc->field_len[i], sizeof(u32)); + f->bb = NFT_PIPAPO_GROUP_BITS_INIT; + f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f); + + priv->width += round_up(len, sizeof(u32)); f->bsize = 0; f->rules = 0; - f->lt = NULL; + NFT_PIPAPO_LT_ASSIGN(f, NULL); f->mt = NULL; } @@ -2010,7 +2080,11 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: +#ifdef NFT_PIPAPO_ALIGN + free_percpu(m->scratch_aligned); +#endif free_percpu(m->scratch); +out_scratch: kfree(m); return err; @@ -2045,16 +2119,21 @@ static void nft_pipapo_destroy(const struct nft_set *set) nft_set_elem_destroy(set, e, true); } +#ifdef NFT_PIPAPO_ALIGN + free_percpu(m->scratch_aligned); +#endif for_each_possible_cpu(cpu) kfree(*per_cpu_ptr(m->scratch, cpu)); free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); priv->match = NULL; } if (priv->clone) { +#ifdef NFT_PIPAPO_ALIGN + free_percpu(priv->clone->scratch_aligned); +#endif for_each_possible_cpu(cpu) kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); free_percpu(priv->clone->scratch); @@ -2081,8 +2160,7 @@ static void nft_pipapo_gc_init(const struct nft_set *set) priv->last_gc = jiffies; } -struct nft_set_type nft_set_pipapo_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_pipapo_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { @@ -2102,3 +2180,26 @@ struct nft_set_type nft_set_pipapo_type __read_mostly = { .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; + +#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2) +const struct nft_set_type nft_set_pipapo_avx2_type = { + .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | + NFT_SET_TIMEOUT, + .ops = { + .lookup = nft_pipapo_avx2_lookup, + .insert = nft_pipapo_insert, + .activate = nft_pipapo_activate, + .deactivate = nft_pipapo_deactivate, + .flush = nft_pipapo_flush, + .remove = nft_pipapo_remove, + .walk = nft_pipapo_walk, + .get = nft_pipapo_get, + .privsize = nft_pipapo_privsize, + .estimate = nft_pipapo_avx2_estimate, + .init = nft_pipapo_init, + .destroy = nft_pipapo_destroy, + .gc_init = nft_pipapo_gc_init, + .elemsize = offsetof(struct nft_pipapo_elem, ext), + }, +}; +#endif diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h new file mode 100644 index 000000000000..25a75591583e --- /dev/null +++ b/net/netfilter/nft_set_pipapo.h @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#ifndef _NFT_SET_PIPAPO_H + +#include <linux/log2.h> +#include <net/ipv6.h> /* For the maximum length of a field */ + +/* Count of concatenated fields depends on count of 32-bit nftables registers */ +#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT + +/* Restrict usage to multiple fields, make sure rbtree is used otherwise */ +#define NFT_PIPAPO_MIN_FIELDS 2 + +/* Largest supported field size */ +#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) +#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) + +/* Bits to be grouped together in table buckets depending on set size */ +#define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET +#define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8 +#define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4 +#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \ + BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \ + (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4)) +#define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb) + +/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the + * small group width, and switch to the big group width if the table gets + * smaller than NFT_PIPAPO_LT_SIZE_LOW. + * + * Picking 2MiB as threshold (for a single table) avoids as much as possible + * crossing page boundaries on most architectures (x86-64 and MIPS huge pages, + * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which + * keeps performance nice in case kvmalloc() gives us non-contiguous areas. + */ +#define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21) +#define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16) +#define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD +#define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \ + NFT_PIPAPO_LT_SIZE_HYSTERESIS + +/* Fields are padded to 32 bits in input registers */ +#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \ + (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32))) +#define NFT_PIPAPO_GROUPS_PADDING(f) \ + (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \ + NFT_PIPAPO_GROUPS_PER_BYTE(f)) + +/* Number of buckets given by 2 ^ n, with n bucket bits */ +#define NFT_PIPAPO_BUCKETS(bb) (1 << (bb)) + +/* Each n-bit range maps to up to n * 2 rules */ +#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) + +/* Use the rest of mapping table buckets for rule indices, but it makes no sense + * to exceed 32 bits + */ +#if BITS_PER_LONG == 64 +#define NFT_PIPAPO_MAP_TOBITS 32 +#else +#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) +#endif + +/* ...which gives us the highest allowed index for a rule */ +#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ + - (1UL << NFT_PIPAPO_MAP_NBITS)) + +/* Definitions for vectorised implementations */ +#ifdef NFT_PIPAPO_ALIGN +#define NFT_PIPAPO_ALIGN_HEADROOM \ + (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) +#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) +#define NFT_PIPAPO_LT_ASSIGN(field, x) \ + do { \ + (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \ + (field)->lt = (x); \ + } while (0) +#else +#define NFT_PIPAPO_ALIGN_HEADROOM 0 +#define NFT_PIPAPO_LT_ALIGN(lt) (lt) +#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x)) +#endif /* NFT_PIPAPO_ALIGN */ + +#define nft_pipapo_for_each_field(field, index, match) \ + for ((field) = (match)->f, (index) = 0; \ + (index) < (match)->field_count; \ + (index)++, (field)++) + +/** + * union nft_pipapo_map_bucket - Bucket of mapping table + * @to: First rule number (in next field) this rule maps to + * @n: Number of rules (in next field) this rule maps to + * @e: If there's no next field, pointer to element this rule maps to + */ +union nft_pipapo_map_bucket { + struct { +#if BITS_PER_LONG == 64 + static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); + u32 to; + + static_assert(NFT_PIPAPO_MAP_NBITS <= 32); + u32 n; +#else + unsigned long to:NFT_PIPAPO_MAP_TOBITS; + unsigned long n:NFT_PIPAPO_MAP_NBITS; +#endif + }; + struct nft_pipapo_elem *e; +}; + +/** + * struct nft_pipapo_field - Lookup, mapping tables and related data for a field + * @groups: Amount of bit groups + * @rules: Number of inserted rules + * @bsize: Size of each bucket in lookup table, in longs + * @bb: Number of bits grouped together in lookup table buckets + * @lt: Lookup table: 'groups' rows of buckets + * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes + * @mt: Mapping table: one bucket per rule + */ +struct nft_pipapo_field { + int groups; + unsigned long rules; + size_t bsize; + int bb; +#ifdef NFT_PIPAPO_ALIGN + unsigned long *lt_aligned; +#endif + unsigned long *lt; + union nft_pipapo_map_bucket *mt; +}; + +/** + * struct nft_pipapo_match - Data used for lookup and matching + * @field_count Amount of fields in set + * @scratch: Preallocated per-CPU maps for partial matching results + * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes + * @bsize_max: Maximum lookup table bucket size of all fields, in longs + * @rcu Matching data is swapped on commits + * @f: Fields, with lookup and mapping tables + */ +struct nft_pipapo_match { + int field_count; +#ifdef NFT_PIPAPO_ALIGN + unsigned long * __percpu *scratch_aligned; +#endif + unsigned long * __percpu *scratch; + size_t bsize_max; + struct rcu_head rcu; + struct nft_pipapo_field f[]; +}; + +/** + * struct nft_pipapo - Representation of a set + * @match: Currently in-use matching data + * @clone: Copy where pending insertions and deletions are kept + * @width: Total bytes to be matched for one packet, including padding + * @dirty: Working copy has pending insertions or deletions + * @last_gc: Timestamp of last garbage collection run, jiffies + */ +struct nft_pipapo { + struct nft_pipapo_match __rcu *match; + struct nft_pipapo_match *clone; + int width; + bool dirty; + unsigned long last_gc; +}; + +struct nft_pipapo_elem; + +/** + * struct nft_pipapo_elem - API-facing representation of single set element + * @ext: nftables API extensions + */ +struct nft_pipapo_elem { + struct nft_set_ext ext; +}; + +int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool match_only); + +/** + * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets + * @f: Field including lookup table + * @dst: Area to store result + * @data: Input data selecting table buckets + */ +static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, + unsigned long *dst, + const u8 *data) +{ + unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); + int group; + + for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) { + u8 v; + + v = *data >> 4; + __bitmap_and(dst, dst, lt + v * f->bsize, + f->bsize * BITS_PER_LONG); + lt += f->bsize * NFT_PIPAPO_BUCKETS(4); + + v = *data & 0x0f; + __bitmap_and(dst, dst, lt + v * f->bsize, + f->bsize * BITS_PER_LONG); + lt += f->bsize * NFT_PIPAPO_BUCKETS(4); + } +} + +/** + * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets + * @f: Field including lookup table + * @dst: Area to store result + * @data: Input data selecting table buckets + */ +static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f, + unsigned long *dst, + const u8 *data) +{ + unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); + int group; + + for (group = 0; group < f->groups; group++, data++) { + __bitmap_and(dst, dst, lt + *data * f->bsize, + f->bsize * BITS_PER_LONG); + lt += f->bsize * NFT_PIPAPO_BUCKETS(8); + } +} + +/** + * pipapo_estimate_size() - Estimate worst-case for set size + * @desc: Set description, element count and field description used here + * + * The size for this set type can vary dramatically, as it depends on the number + * of rules (composing netmasks) the entries expand to. We compute the worst + * case here. + * + * In general, for a non-ranged entry or a single composing netmask, we need + * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that + * is, each input bit needs four bits of matching data), plus a bucket in the + * mapping table for each field. + * + * Return: worst-case set size in bytes, 0 on any overflow + */ +static u64 pipapo_estimate_size(const struct nft_set_desc *desc) +{ + unsigned long entry_size; + u64 size; + int i; + + for (i = 0, entry_size = 0; i < desc->field_count; i++) { + unsigned long rules; + + if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES) + return 0; + + /* Worst-case ranges for each concatenated field: each n-bit + * field can expand to up to n * 2 rules in each bucket, and + * each rule also needs a mapping bucket. + */ + rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2; + entry_size += rules * + NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) / + BITS_PER_BYTE; + entry_size += rules * sizeof(union nft_pipapo_map_bucket); + } + + /* Rules in lookup and mapping tables are needed for each entry */ + size = desc->size * entry_size; + if (size && div_u64(size, desc->size) != entry_size) + return 0; + + size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2; + + size += sizeof(struct nft_pipapo_field) * desc->field_count; + + return size; +} + +#endif /* _NFT_SET_PIPAPO_H */ diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c new file mode 100644 index 000000000000..d65ae0e23028 --- /dev/null +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -0,0 +1,1223 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* PIPAPO: PIle PAcket POlicies: AVX2 packet lookup routines + * + * Copyright (c) 2019-2020 Red Hat GmbH + * + * Author: Stefano Brivio <sbrivio@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <uapi/linux/netfilter/nf_tables.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include <linux/compiler.h> +#include <asm/fpu/api.h> + +#include "nft_set_pipapo_avx2.h" +#include "nft_set_pipapo.h" + +#define NFT_PIPAPO_LONGS_PER_M256 (XSAVE_YMM_SIZE / BITS_PER_LONG) + +/* Load from memory into YMM register with non-temporal hint ("stream load"), + * that is, don't fetch lines from memory into the cache. This avoids pushing + * precious packet data out of the cache hierarchy, and is appropriate when: + * + * - loading buckets from lookup tables, as they are not going to be used + * again before packets are entirely classified + * + * - loading the result bitmap from the previous field, as it's never used + * again + */ +#define NFT_PIPAPO_AVX2_LOAD(reg, loc) \ + asm volatile("vmovntdqa %0, %%ymm" #reg : : "m" (loc)) + +/* Stream a single lookup table bucket into YMM register given lookup table, + * group index, value of packet bits, bucket size. + */ +#define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize) \ + NFT_PIPAPO_AVX2_LOAD(reg, \ + lt[((group) * NFT_PIPAPO_BUCKETS(4) + \ + (v)) * (bsize)]) +#define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize) \ + NFT_PIPAPO_AVX2_LOAD(reg, \ + lt[((group) * NFT_PIPAPO_BUCKETS(8) + \ + (v)) * (bsize)]) + +/* Bitwise AND: the staple operation of this algorithm */ +#define NFT_PIPAPO_AVX2_AND(dst, a, b) \ + asm volatile("vpand %ymm" #a ", %ymm" #b ", %ymm" #dst) + +/* Jump to label if @reg is zero */ +#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ + asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + "je %l[" #label "]" : : : : label) + +/* Store 256 bits from YMM register into memory. Contrary to bucket load + * operation, we don't bypass the cache here, as stored matching results + * are always used shortly after. + */ +#define NFT_PIPAPO_AVX2_STORE(loc, reg) \ + asm volatile("vmovdqa %%ymm" #reg ", %0" : "=m" (loc)) + +/* Zero out a complete YMM register, @reg */ +#define NFT_PIPAPO_AVX2_ZERO(reg) \ + asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) + +/* Current working bitmap index, toggled between field matches */ +static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); + +/** + * nft_pipapo_avx2_prepare() - Prepare before main algorithm body + * + * This zeroes out ymm15, which is later used whenever we need to clear a + * memory location, by storing its content into memory. + */ +static void nft_pipapo_avx2_prepare(void) +{ + NFT_PIPAPO_AVX2_ZERO(15); +} + +/** + * nft_pipapo_avx2_fill() - Fill a bitmap region with ones + * @data: Base memory area + * @start: First bit to set + * @len: Count of bits to fill + * + * This is nothing else than a version of bitmap_set(), as used e.g. by + * pipapo_refill(), tailored for the microarchitectures using it and better + * suited for the specific usage: it's very likely that we'll set a small number + * of bits, not crossing a word boundary, and correct branch prediction is + * critical here. + * + * This function doesn't actually use any AVX2 instruction. + */ +static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len) +{ + int offset = start % BITS_PER_LONG; + unsigned long mask; + + data += start / BITS_PER_LONG; + + if (likely(len == 1)) { + *data |= BIT(offset); + return; + } + + if (likely(len < BITS_PER_LONG || offset)) { + if (likely(len + offset <= BITS_PER_LONG)) { + *data |= GENMASK(len - 1 + offset, offset); + return; + } + + *data |= ~0UL << offset; + len -= BITS_PER_LONG - offset; + data++; + + if (len <= BITS_PER_LONG) { + mask = ~0UL >> (BITS_PER_LONG - len); + *data |= mask; + return; + } + } + + memset(data, 0xff, len / BITS_PER_BYTE); + data += len / BITS_PER_LONG; + + len %= BITS_PER_LONG; + if (len) + *data |= ~0UL >> (BITS_PER_LONG - len); +} + +/** + * nft_pipapo_avx2_refill() - Scan bitmap, select mapping table item, set bits + * @offset: Start from given bitmap (equivalent to bucket) offset, in longs + * @map: Bitmap to be scanned for set bits + * @dst: Destination bitmap + * @mt: Mapping table containing bit set specifiers + * @len: Length of bitmap in longs + * @last: Return index of first set bit, if this is the last field + * + * This is an alternative implementation of pipapo_refill() suitable for usage + * with AVX2 lookup routines: we know there are four words to be scanned, at + * a given offset inside the map, for each matching iteration. + * + * This function doesn't actually use any AVX2 instruction. + * + * Return: first set bit index if @last, index of first filled word otherwise. + */ +static int nft_pipapo_avx2_refill(int offset, unsigned long *map, + unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool last) +{ + int ret = -1; + +#define NFT_PIPAPO_AVX2_REFILL_ONE_WORD(x) \ + do { \ + while (map[(x)]) { \ + int r = __builtin_ctzl(map[(x)]); \ + int i = (offset + (x)) * BITS_PER_LONG + r; \ + \ + if (last) \ + return i; \ + \ + nft_pipapo_avx2_fill(dst, mt[i].to, mt[i].n); \ + \ + if (ret == -1) \ + ret = mt[i].to; \ + \ + map[(x)] &= ~(1UL << r); \ + } \ + } while (0) + + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(0); + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(1); + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(2); + NFT_PIPAPO_AVX2_REFILL_ONE_WORD(3); +#undef NFT_PIPAPO_AVX2_REFILL_ONE_WORD + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_2() - AVX2-based lookup for 2 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * Load buckets from lookup table corresponding to the values of each 4-bit + * group of packet bytes, and perform a bitwise intersection between them. If + * this is the first field in the set, simply AND the buckets together + * (equivalent to using an all-ones starting bitmap), use the provided starting + * bitmap otherwise. Then call nft_pipapo_avx2_refill() to generate the next + * working bitmap, @fill. + * + * This is used for 8-bit fields (i.e. protocol numbers). + * + * Out-of-order (and superscalar) execution is vital here, so it's critical to + * avoid false data dependencies. CPU and compiler could (mostly) take care of + * this on their own, but the operation ordering is explicitly given here with + * a likely execution order in mind, to highlight possible stalls. That's why + * a number of logically distinct operations (i.e. loading buckets, intersecting + * buckets) are interleaved. + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf }; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_LOAD(2, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nothing); + NFT_PIPAPO_AVX2_AND(3, 0, 1); + NFT_PIPAPO_AVX2_AND(4, 2, 3); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 4); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_4() - AVX2-based lookup for 4 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 16-bit fields (i.e. ports). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf }; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_AND(7, 4, 5); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_AND(5, 0, 1); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_AND(7, 4, 5); + /* Stall */ + NFT_PIPAPO_AVX2_AND(7, 6, 7); + } + + /* Stall */ + NFT_PIPAPO_AVX2_NOMATCH_GOTO(7, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 7); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_8() - AVX2-based lookup for 8 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 32-bit fields (i.e. IPv4 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, + }; + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(8, 2, 3); + NFT_PIPAPO_AVX2_AND(9, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(11, 6, 7); + NFT_PIPAPO_AVX2_AND(12, 8, 9); + NFT_PIPAPO_AVX2_AND(13, 10, 11); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(1, 12, 13); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_AND(8, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(10, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(12, 6, 7); + NFT_PIPAPO_AVX2_AND(13, 8, 9); + NFT_PIPAPO_AVX2_AND(14, 10, 11); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(1, 12, 13); + NFT_PIPAPO_AVX2_AND(1, 1, 14); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 1); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_12() - AVX2-based lookup for 12 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 48-bit fields (i.e. MAC addresses/EUI-48). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, + pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf, + }; + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (!first) + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + + if (!first) { + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(1, 1, 0); + } + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(9, 1, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(11, 5, 6); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 8, pg[8], bsize); + NFT_PIPAPO_AVX2_AND(13, 7, 8); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 9, pg[9], bsize); + + NFT_PIPAPO_AVX2_AND(0, 9, 10); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 10, pg[10], bsize); + NFT_PIPAPO_AVX2_AND(2, 11, 12); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize); + NFT_PIPAPO_AVX2_AND(4, 13, 14); + NFT_PIPAPO_AVX2_AND(5, 0, 1); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + + /* Stalls */ + NFT_PIPAPO_AVX2_AND(7, 4, 5); + NFT_PIPAPO_AVX2_AND(8, 6, 7); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(8, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 8); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_4b_32() - AVX2-based lookup for 32 four-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 128-bit fields (i.e. IPv6 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, + pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, + pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf, + pkt[6] >> 4, pkt[6] & 0xf, pkt[7] >> 4, pkt[7] & 0xf, + pkt[8] >> 4, pkt[8] & 0xf, pkt[9] >> 4, pkt[9] & 0xf, + pkt[10] >> 4, pkt[10] & 0xf, pkt[11] >> 4, pkt[11] & 0xf, + pkt[12] >> 4, pkt[12] & 0xf, pkt[13] >> 4, pkt[13] & 0xf, + pkt[14] >> 4, pkt[14] & 0xf, pkt[15] >> 4, pkt[15] & 0xf, + }; + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (!first) + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize); + if (!first) { + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(1, 1, 0); + } + + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize); + NFT_PIPAPO_AVX2_AND(8, 1, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize); + NFT_PIPAPO_AVX2_AND(10, 5, 6); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize); + NFT_PIPAPO_AVX2_AND(12, 7, 8); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(13, lt, 8, pg[8], bsize); + NFT_PIPAPO_AVX2_AND(14, 9, 10); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 9, pg[9], bsize); + NFT_PIPAPO_AVX2_AND(1, 11, 12); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 10, pg[10], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize); + NFT_PIPAPO_AVX2_AND(4, 13, 14); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 12, pg[12], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 13, pg[13], bsize); + NFT_PIPAPO_AVX2_AND(7, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 14, pg[14], bsize); + NFT_PIPAPO_AVX2_AND(9, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 15, pg[15], bsize); + NFT_PIPAPO_AVX2_AND(11, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 16, pg[16], bsize); + NFT_PIPAPO_AVX2_AND(13, 6, 7); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 17, pg[17], bsize); + + NFT_PIPAPO_AVX2_AND(0, 8, 9); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 18, pg[18], bsize); + NFT_PIPAPO_AVX2_AND(2, 10, 11); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 19, pg[19], bsize); + NFT_PIPAPO_AVX2_AND(4, 12, 13); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 20, pg[20], bsize); + NFT_PIPAPO_AVX2_AND(6, 14, 0); + NFT_PIPAPO_AVX2_AND(7, 1, 2); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 21, pg[21], bsize); + NFT_PIPAPO_AVX2_AND(9, 3, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 22, pg[22], bsize); + NFT_PIPAPO_AVX2_AND(11, 5, 6); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 23, pg[23], bsize); + NFT_PIPAPO_AVX2_AND(13, 7, 8); + + NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 24, pg[24], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 25, pg[25], bsize); + NFT_PIPAPO_AVX2_AND(1, 9, 10); + NFT_PIPAPO_AVX2_AND(2, 11, 12); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 26, pg[26], bsize); + NFT_PIPAPO_AVX2_AND(4, 13, 14); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 27, pg[27], bsize); + NFT_PIPAPO_AVX2_AND(6, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 28, pg[28], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 29, pg[29], bsize); + NFT_PIPAPO_AVX2_AND(9, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 30, pg[30], bsize); + NFT_PIPAPO_AVX2_AND(11, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 31, pg[31], bsize); + + NFT_PIPAPO_AVX2_AND(0, 6, 7); + NFT_PIPAPO_AVX2_AND(1, 8, 9); + NFT_PIPAPO_AVX2_AND(2, 10, 11); + NFT_PIPAPO_AVX2_AND(3, 12, 0); + + /* Stalls */ + NFT_PIPAPO_AVX2_AND(4, 1, 2); + NFT_PIPAPO_AVX2_AND(5, 3, 4); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(5, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 5); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_1() - AVX2-based lookup for one eight-bit group + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 8-bit fields (i.e. protocol numbers). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 0, pkt[0], bsize); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_AND(2, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 2); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_2() - AVX2-based lookup for 2 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 16-bit fields (i.e. ports). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + } else { + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(3, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(4, 3, 2); + } + + /* Stall */ + NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 4); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_4() - AVX2-based lookup for 4 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 32-bit fields (i.e. IPv4 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + NFT_PIPAPO_AVX2_AND(6, 2, 3); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(7, 4, 5); + NFT_PIPAPO_AVX2_AND(0, 6, 7); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 0); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_6() - AVX2-based lookup for 6 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 48-bit fields (i.e. MAC addresses/EUI-48). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (first) { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize); + NFT_PIPAPO_AVX2_AND(7, 2, 3); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_AND(1, 6, 7); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + } else { + NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize); + + NFT_PIPAPO_AVX2_AND(5, 0, 1); + NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 4, pkt[4], bsize); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 5, pkt[5], bsize); + NFT_PIPAPO_AVX2_AND(2, 6, 7); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(3, 0, 1); + NFT_PIPAPO_AVX2_AND(4, 2, 3); + } + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 4); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_8b_16() - AVX2-based lookup for 16 eight-bit groups + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * See nft_pipapo_avx2_lookup_4b_2(). + * + * This is used for 128-bit fields (i.e. IPv6 addresses). + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; + unsigned long *lt = f->lt, bsize = f->bsize; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) { + int i_ul = i * NFT_PIPAPO_LONGS_PER_M256; + + if (!first) + NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]); + + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize); + if (!first) { + NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing); + NFT_PIPAPO_AVX2_AND(1, 1, 0); + } + NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize); + + NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 4, pkt[4], bsize); + NFT_PIPAPO_AVX2_AND(6, 1, 2); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 5, pkt[5], bsize); + NFT_PIPAPO_AVX2_AND(0, 3, 4); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 6, pkt[6], bsize); + + NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 7, pkt[7], bsize); + NFT_PIPAPO_AVX2_AND(3, 5, 6); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize); + + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize); + NFT_PIPAPO_AVX2_AND(2, 6, 7); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 12, pkt[12], bsize); + NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 13, pkt[13], bsize); + NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 14, pkt[14], bsize); + NFT_PIPAPO_AVX2_AND(2, 6, 7); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 15, pkt[15], bsize); + NFT_PIPAPO_AVX2_AND(4, 0, 1); + + /* Stall */ + NFT_PIPAPO_AVX2_AND(5, 2, 3); + NFT_PIPAPO_AVX2_AND(6, 4, 5); + + NFT_PIPAPO_AVX2_NOMATCH_GOTO(6, nomatch); + NFT_PIPAPO_AVX2_STORE(map[i_ul], 6); + + b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last); + if (last) + return b; + + if (unlikely(ret == -1)) + ret = b / XSAVE_YMM_SIZE; + + continue; + +nomatch: + NFT_PIPAPO_AVX2_STORE(map[i_ul], 15); +nothing: + ; + } + + return ret; +} + +/** + * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes + * @map: Previous match result, used as initial bitmap + * @fill: Destination bitmap to be filled with current match result + * @f: Field, containing lookup and mapping tables + * @offset: Ignore buckets before the given index, no bits are filled there + * @pkt: Packet data, pointer to input nftables register + * @first: If this is the first field, don't source previous result + * @last: Last field: stop at the first match and return bit index + * + * This function should never be called, but is provided for the case the field + * size doesn't match any of the known data types. Matching rate is + * substantially lower than AVX2 routines. + * + * Return: -1 on no match, rule index of match if @last, otherwise first long + * word index to be checked next (i.e. first filled word). + */ +static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, + struct nft_pipapo_field *f, int offset, + const u8 *pkt, bool first, bool last) +{ + unsigned long *lt = f->lt, bsize = f->bsize; + int i, ret = -1, b; + + lt += offset * NFT_PIPAPO_LONGS_PER_M256; + + if (first) + memset(map, 0xff, bsize * sizeof(*map)); + + for (i = offset; i < bsize; i++) { + if (f->bb == 8) + pipapo_and_field_buckets_8bit(f, map, pkt); + else + pipapo_and_field_buckets_4bit(f, map, pkt); + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + + b = pipapo_refill(map, bsize, f->rules, fill, f->mt, last); + + if (last) + return b; + + if (ret == -1) + ret = b / XSAVE_YMM_SIZE; + } + + return ret; +} + +/** + * nft_pipapo_avx2_estimate() - Set size, space and lookup complexity + * @desc: Set description, element count and field description used + * @features: Flags: NFT_SET_INTERVAL needs to be there + * @est: Storage for estimation data + * + * Return: true if set is compatible and AVX2 available, false otherwise. + */ +bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + if (!(features & NFT_SET_INTERVAL) || + desc->field_count < NFT_PIPAPO_MIN_FIELDS) + return false; + + if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AVX)) + return false; + + est->size = pipapo_estimate_size(desc); + if (!est->size) + return false; + + est->lookup = NFT_SET_CLASS_O_LOG_N; + + est->space = NFT_SET_CLASS_O_N; + + return true; +} + +/** + * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation + * @net: Network namespace + * @set: nftables API set representation + * @elem: nftables API element representation containing key data + * @ext: nftables API extension pointer, filled with matching reference + * + * For more details, see DOC: Theory of Operation in nft_set_pipapo.c. + * + * This implementation exploits the repetitive characteristic of the algorithm + * to provide a fast, vectorised version using the AVX2 SIMD instruction set. + * + * Return: true on match, false otherwise. + */ +bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_pipapo *priv = nft_set_priv(set); + unsigned long *res, *fill, *scratch; + u8 genmask = nft_genmask_cur(net); + const u8 *rp = (const u8 *)key; + struct nft_pipapo_match *m; + struct nft_pipapo_field *f; + bool map_index; + int i, ret = 0; + + m = rcu_dereference(priv->match); + + /* This also protects access to all data related to scratch maps */ + kernel_fpu_begin(); + + scratch = *raw_cpu_ptr(m->scratch_aligned); + if (unlikely(!scratch)) { + kernel_fpu_end(); + return false; + } + map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); + + res = scratch + (map_index ? m->bsize_max : 0); + fill = scratch + (map_index ? 0 : m->bsize_max); + + /* Starting map doesn't need to be set for this implementation */ + + nft_pipapo_avx2_prepare(); + +next_match: + nft_pipapo_for_each_field(f, i, m) { + bool last = i == m->field_count - 1, first = !i; + +#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \ + (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \ + ret, rp, \ + first, last)) + + if (likely(f->bb == 8)) { + if (f->groups == 1) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 1); + } else if (f->groups == 2) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 2); + } else if (f->groups == 4) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 4); + } else if (f->groups == 6) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 6); + } else if (f->groups == 16) { + NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); + } else { + ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret, rp, + first, last); + } + } else { + if (f->groups == 2) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 2); + } else if (f->groups == 4) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 4); + } else if (f->groups == 8) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 8); + } else if (f->groups == 12) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 12); + } else if (f->groups == 32) { + NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); + } else { + ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret, rp, + first, last); + } + } + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + +#undef NFT_SET_PIPAPO_AVX2_LOOKUP + + if (ret < 0) + goto out; + + if (last) { + *ext = &f->mt[ret].e->ext; + if (unlikely(nft_set_elem_expired(*ext) || + !nft_set_elem_active(*ext, genmask))) { + ret = 0; + goto next_match; + } + + goto out; + } + + swap(res, fill); + rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); + } + +out: + if (i % 2) + raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); + kernel_fpu_end(); + + return ret >= 0; +} diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h new file mode 100644 index 000000000000..396caf7bfca8 --- /dev/null +++ b/net/netfilter/nft_set_pipapo_avx2.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _NFT_SET_PIPAPO_AVX2_H + +#ifdef CONFIG_AS_AVX2 +#include <asm/fpu/xstate.h> +#define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE) + +bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); +bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est); +#endif /* CONFIG_AS_AVX2 */ + +#endif /* _NFT_SET_PIPAPO_AVX2_H */ diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 5000b938ab1e..172ef8189f99 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -481,8 +481,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } -struct nft_set_type nft_set_rbtree_type __read_mostly = { - .owner = THIS_MODULE, +const struct nft_set_type nft_set_rbtree_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { .privsize = nft_rbtree_privsize, diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 764e88682a81..30be5787fbde 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -11,6 +11,7 @@ #include <net/ip_tunnels.h> #include <net/vxlan.h> #include <net/erspan.h> +#include <net/geneve.h> struct nft_tunnel { enum nft_tunnel_keys key:8; @@ -144,6 +145,7 @@ struct nft_tunnel_opts { union { struct vxlan_metadata vxlan; struct erspan_metadata erspan; + u8 data[IP_TUNNEL_OPTS_MAX]; } u; u32 len; __be16 flags; @@ -301,9 +303,53 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, return 0; } +static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = { + [NFTA_TUNNEL_KEY_GENEVE_CLASS] = { .type = NLA_U16 }, + [NFTA_TUNNEL_KEY_GENEVE_TYPE] = { .type = NLA_U8 }, + [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 }, +}; + +static int nft_tunnel_obj_geneve_init(const struct nlattr *attr, + struct nft_tunnel_opts *opts) +{ + struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len; + struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1]; + int err, data_len; + + err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_GENEVE_MAX, attr, + nft_tunnel_opts_geneve_policy, NULL); + if (err < 0) + return err; + + if (!tb[NFTA_TUNNEL_KEY_GENEVE_CLASS] || + !tb[NFTA_TUNNEL_KEY_GENEVE_TYPE] || + !tb[NFTA_TUNNEL_KEY_GENEVE_DATA]) + return -EINVAL; + + attr = tb[NFTA_TUNNEL_KEY_GENEVE_DATA]; + data_len = nla_len(attr); + if (data_len % 4) + return -EINVAL; + + opts->len += sizeof(*opt) + data_len; + if (opts->len > IP_TUNNEL_OPTS_MAX) + return -EINVAL; + + memcpy(opt->opt_data, nla_data(attr), data_len); + opt->length = data_len / 4; + opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]); + opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]); + opts->flags = TUNNEL_GENEVE_OPT; + + return 0; +} + static const struct nla_policy nft_tunnel_opts_policy[NFTA_TUNNEL_KEY_OPTS_MAX + 1] = { + [NFTA_TUNNEL_KEY_OPTS_UNSPEC] = { + .strict_start_type = NFTA_TUNNEL_KEY_OPTS_GENEVE }, [NFTA_TUNNEL_KEY_OPTS_VXLAN] = { .type = NLA_NESTED, }, [NFTA_TUNNEL_KEY_OPTS_ERSPAN] = { .type = NLA_NESTED, }, + [NFTA_TUNNEL_KEY_OPTS_GENEVE] = { .type = NLA_NESTED, }, }; static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, @@ -311,22 +357,43 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, struct ip_tunnel_info *info, struct nft_tunnel_opts *opts) { - struct nlattr *tb[NFTA_TUNNEL_KEY_OPTS_MAX + 1]; - int err; + int err, rem, type = 0; + struct nlattr *nla; - err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr, - nft_tunnel_opts_policy, NULL); + err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX, + nft_tunnel_opts_policy, NULL); if (err < 0) return err; - if (tb[NFTA_TUNNEL_KEY_OPTS_VXLAN]) { - err = nft_tunnel_obj_vxlan_init(tb[NFTA_TUNNEL_KEY_OPTS_VXLAN], - opts); - } else if (tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN]) { - err = nft_tunnel_obj_erspan_init(tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN], - opts); - } else { - return -EOPNOTSUPP; + nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(nla)) { + case NFTA_TUNNEL_KEY_OPTS_VXLAN: + if (type) + return -EINVAL; + err = nft_tunnel_obj_vxlan_init(nla, opts); + if (err) + return err; + type = TUNNEL_VXLAN_OPT; + break; + case NFTA_TUNNEL_KEY_OPTS_ERSPAN: + if (type) + return -EINVAL; + err = nft_tunnel_obj_erspan_init(nla, opts); + if (err) + return err; + type = TUNNEL_ERSPAN_OPT; + break; + case NFTA_TUNNEL_KEY_OPTS_GENEVE: + if (type && type != TUNNEL_GENEVE_OPT) + return -EINVAL; + err = nft_tunnel_obj_geneve_init(nla, opts); + if (err) + return err; + type = TUNNEL_GENEVE_OPT; + break; + default: + return -EOPNOTSUPP; + } } return err; @@ -518,6 +585,25 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, break; } nla_nest_end(skb, inner); + } else if (opts->flags & TUNNEL_GENEVE_OPT) { + struct geneve_opt *opt; + int offset = 0; + + inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); + if (!inner) + goto failure; + while (opts->len > offset) { + opt = (struct geneve_opt *)opts->u.data + offset; + if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS, + opt->opt_class) || + nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE, + opt->type) || + nla_put(skb, NFTA_TUNNEL_KEY_GENEVE_DATA, + opt->length * 4, opt->opt_data)) + goto inner_failure; + offset += sizeof(*opt) + opt->length * 4; + } + nla_nest_end(skb, inner); } nla_nest_end(skb, nest); return 0; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f56d3ed93e56..75bd0e5dd312 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/timer.h> +#include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/netfilter.h> @@ -30,6 +31,7 @@ struct idletimer_tg { struct list_head entry; + struct alarm alarm; struct timer_list timer; struct work_struct work; @@ -37,6 +39,7 @@ struct idletimer_tg { struct device_attribute attr; unsigned int refcnt; + u8 timer_type; }; static LIST_HEAD(idletimer_tg_list); @@ -62,20 +65,29 @@ static ssize_t idletimer_tg_show(struct device *dev, { struct idletimer_tg *timer; unsigned long expires = 0; + struct timespec64 ktimespec = {}; + long time_diff = 0; mutex_lock(&list_mutex); timer = __idletimer_tg_find_by_label(attr->attr.name); - if (timer) - expires = timer->timer.expires; + if (timer) { + if (timer->timer_type & XT_IDLETIMER_ALARM) { + ktime_t expires_alarm = alarm_expires_remaining(&timer->alarm); + ktimespec = ktime_to_timespec64(expires_alarm); + time_diff = ktimespec.tv_sec; + } else { + expires = timer->timer.expires; + time_diff = jiffies_to_msecs(expires - jiffies) / 1000; + } + } mutex_unlock(&list_mutex); - if (time_after(expires, jiffies)) - return sprintf(buf, "%u\n", - jiffies_to_msecs(expires - jiffies) / 1000); + if (time_after(expires, jiffies) || ktimespec.tv_sec > 0) + return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff); - return sprintf(buf, "0\n"); + return snprintf(buf, PAGE_SIZE, "0\n"); } static void idletimer_tg_work(struct work_struct *work) @@ -95,6 +107,16 @@ static void idletimer_tg_expired(struct timer_list *t) schedule_work(&timer->work); } +static enum alarmtimer_restart idletimer_tg_alarmproc(struct alarm *alarm, + ktime_t now) +{ + struct idletimer_tg *timer = alarm->data; + + pr_debug("alarm %s expired\n", timer->attr.attr.name); + schedule_work(&timer->work); + return ALARMTIMER_NORESTART; +} + static int idletimer_check_sysfs_name(const char *name, unsigned int size) { int ret; @@ -160,6 +182,68 @@ out: return ret; } +static int idletimer_tg_create_v1(struct idletimer_tg_info_v1 *info) +{ + int ret; + + info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); + if (!info->timer) { + ret = -ENOMEM; + goto out; + } + + ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); + if (ret < 0) + goto out_free_timer; + + sysfs_attr_init(&info->timer->attr.attr); + info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); + if (!info->timer->attr.attr.name) { + ret = -ENOMEM; + goto out_free_timer; + } + info->timer->attr.attr.mode = 0444; + info->timer->attr.show = idletimer_tg_show; + + ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); + if (ret < 0) { + pr_debug("couldn't add file to sysfs"); + goto out_free_attr; + } + + /* notify userspace */ + kobject_uevent(idletimer_tg_kobj,KOBJ_ADD); + + list_add(&info->timer->entry, &idletimer_tg_list); + pr_debug("timer type value is %u", info->timer_type); + info->timer->timer_type = info->timer_type; + info->timer->refcnt = 1; + + INIT_WORK(&info->timer->work, idletimer_tg_work); + + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + ktime_t tout; + alarm_init(&info->timer->alarm, ALARM_BOOTTIME, + idletimer_tg_alarmproc); + info->timer->alarm.data = info->timer; + tout = ktime_set(info->timeout, 0); + alarm_start_relative(&info->timer->alarm, tout); + } else { + timer_setup(&info->timer->timer, idletimer_tg_expired, 0); + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + } + + return 0; + +out_free_attr: + kfree(info->timer->attr.attr.name); +out_free_timer: + kfree(info->timer); +out: + return ret; +} + /* * The actual xt_tables plugin. */ @@ -177,13 +261,30 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, return XT_CONTINUE; } -static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) +/* + * The actual xt_tables plugin. + */ +static unsigned int idletimer_tg_target_v1(struct sk_buff *skb, + const struct xt_action_param *par) { - struct idletimer_tg_info *info = par->targinfo; - int ret; + const struct idletimer_tg_info_v1 *info = par->targinfo; - pr_debug("checkentry targinfo%s\n", info->label); + pr_debug("resetting timer %s, timeout period %u\n", + info->label, info->timeout); + + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + ktime_t tout = ktime_set(info->timeout, 0); + alarm_start_relative(&info->timer->alarm, tout); + } else { + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + } + return XT_CONTINUE; +} + +static int idletimer_tg_helper(struct idletimer_tg_info *info) +{ if (info->timeout == 0) { pr_debug("timeout value is zero\n"); return -EINVAL; @@ -198,7 +299,23 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) pr_debug("label is empty or not nul-terminated\n"); return -EINVAL; } + return 0; +} + +static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) +{ + struct idletimer_tg_info *info = par->targinfo; + int ret; + + pr_debug("checkentry targinfo%s\n", info->label); + + ret = idletimer_tg_helper(info); + if(ret < 0) + { + pr_debug("checkentry helper return invalid\n"); + return -EINVAL; + } mutex_lock(&list_mutex); info->timer = __idletimer_tg_find_by_label(info->label); @@ -222,6 +339,65 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) return 0; } +static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par) +{ + struct idletimer_tg_info_v1 *info = par->targinfo; + int ret; + + pr_debug("checkentry targinfo%s\n", info->label); + + ret = idletimer_tg_helper((struct idletimer_tg_info *)info); + if(ret < 0) + { + pr_debug("checkentry helper return invalid\n"); + return -EINVAL; + } + + if (info->timer_type > XT_IDLETIMER_ALARM) { + pr_debug("invalid value for timer type\n"); + return -EINVAL; + } + + mutex_lock(&list_mutex); + + info->timer = __idletimer_tg_find_by_label(info->label); + if (info->timer) { + if (info->timer->timer_type != info->timer_type) { + pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n"); + mutex_unlock(&list_mutex); + return -EINVAL; + } + + info->timer->refcnt++; + if (info->timer_type & XT_IDLETIMER_ALARM) { + /* calculate remaining expiry time */ + ktime_t tout = alarm_expires_remaining(&info->timer->alarm); + struct timespec64 ktimespec = ktime_to_timespec64(tout); + + if (ktimespec.tv_sec > 0) { + pr_debug("time_expiry_remaining %lld\n", + ktimespec.tv_sec); + alarm_start_relative(&info->timer->alarm, tout); + } + } else { + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + } + pr_debug("increased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } else { + ret = idletimer_tg_create_v1(info); + if (ret < 0) { + pr_debug("failed to create timer\n"); + mutex_unlock(&list_mutex); + return ret; + } + } + + mutex_unlock(&list_mutex); + return 0; +} + static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) { const struct idletimer_tg_info *info = par->targinfo; @@ -247,7 +423,38 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) mutex_unlock(&list_mutex); } -static struct xt_target idletimer_tg __read_mostly = { +static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) +{ + const struct idletimer_tg_info_v1 *info = par->targinfo; + + pr_debug("destroy targinfo %s\n", info->label); + + mutex_lock(&list_mutex); + + if (--info->timer->refcnt == 0) { + pr_debug("deleting timer %s\n", info->label); + + list_del(&info->timer->entry); + if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + alarm_cancel(&info->timer->alarm); + } else { + del_timer_sync(&info->timer->timer); + } + cancel_work_sync(&info->timer->work); + sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + kfree(info->timer->attr.attr.name); + kfree(info->timer); + } else { + pr_debug("decreased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } + + mutex_unlock(&list_mutex); +} + + +static struct xt_target idletimer_tg[] __read_mostly = { + { .name = "IDLETIMER", .family = NFPROTO_UNSPEC, .target = idletimer_tg_target, @@ -256,6 +463,20 @@ static struct xt_target idletimer_tg __read_mostly = { .checkentry = idletimer_tg_checkentry, .destroy = idletimer_tg_destroy, .me = THIS_MODULE, + }, + { + .name = "IDLETIMER", + .family = NFPROTO_UNSPEC, + .revision = 1, + .target = idletimer_tg_target_v1, + .targetsize = sizeof(struct idletimer_tg_info_v1), + .usersize = offsetof(struct idletimer_tg_info_v1, timer), + .checkentry = idletimer_tg_checkentry_v1, + .destroy = idletimer_tg_destroy_v1, + .me = THIS_MODULE, + }, + + }; static struct class *idletimer_tg_class; @@ -283,7 +504,8 @@ static int __init idletimer_tg_init(void) idletimer_tg_kobj = &idletimer_tg_device->kobj; - err = xt_register_target(&idletimer_tg); + err = xt_register_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); + if (err < 0) { pr_debug("couldn't register xt target\n"); goto out_dev; @@ -300,7 +522,7 @@ out: static void __exit idletimer_tg_exit(void) { - xt_unregister_target(&idletimer_tg); + xt_unregister_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg)); device_destroy(idletimer_tg_class, MKDEV(0, 0)); class_destroy(idletimer_tg_class); diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 2317721f3ecb..75625d13e976 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -21,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: packet security mark modification"); MODULE_ALIAS("ipt_SECMARK"); MODULE_ALIAS("ip6t_SECMARK"); -#define PFX "SECMARK: " - static u8 mode; static unsigned int diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 8c835ad63729..9c5cfd74a0ee 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -132,7 +132,7 @@ struct xt_hashlimit_htable { const char *name; struct net *net; - struct hlist_head hash[0]; /* hashtable itself */ + struct hlist_head hash[]; /* hashtable itself */ }; static int diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 225a7ab6d79a..19bef176145e 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -71,7 +71,7 @@ struct recent_entry { u_int8_t ttl; u_int8_t index; u_int16_t nstamps; - unsigned long stamps[0]; + unsigned long stamps[]; }; struct recent_table { @@ -82,7 +82,7 @@ struct recent_table { unsigned int entries; u8 nstamps_max_mask; struct list_head lru_list; - struct list_head iphash[0]; + struct list_head iphash[]; }; struct recent_net { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 363264ca2e09..fb6c3660fb9a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3528,9 +3528,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, struct tc_action *act; int i, j, k, err = 0; - BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_ANY != FLOW_ACTION_HW_STATS_TYPE_ANY); - BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_IMMEDIATE != FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE); - BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_DELAYED != FLOW_ACTION_HW_STATS_TYPE_DELAYED); + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_ANY != FLOW_ACTION_HW_STATS_ANY); + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE); + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_DELAYED != FLOW_ACTION_HW_STATS_DELAYED); if (!exts) return 0; @@ -3613,8 +3613,8 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mangle.mask = tcf_pedit_mask(act, k); entry->mangle.val = tcf_pedit_val(act, k); entry->mangle.offset = tcf_pedit_offset(act, k); - entry = &flow_action->entries[++j]; entry->hw_stats_type = act->hw_stats_type; + entry = &flow_action->entries[++j]; } } else if (is_tcf_csum(act)) { entry->id = FLOW_ACTION_CSUM; @@ -3665,6 +3665,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_skbedit_ptype(act)) { entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); + } else if (is_tcf_skbedit_priority(act)) { + entry->id = FLOW_ACTION_PRIORITY; + entry->priority = tcf_skbedit_priority(act); } else { err = -EOPNOTSUPP; goto err_out_locked; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 50794125bf02..0d99df1e764d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -618,21 +618,28 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_watchdog_init); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) +void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, + u64 delta_ns) { if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) return; - if (wd->last_expires == expires) - return; + if (hrtimer_is_queued(&wd->timer)) { + /* If timer is already set in [expires, expires + delta_ns], + * do not reprogram it. + */ + if (wd->last_expires - expires <= delta_ns) + return; + } wd->last_expires = expires; - hrtimer_start(&wd->timer, - ns_to_ktime(expires), - HRTIMER_MODE_ABS_PINNED); + hrtimer_start_range_ns(&wd->timer, + ns_to_ktime(expires), + delta_ns, + HRTIMER_MODE_ABS_PINNED); } -EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); +EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 371ad84def3b..4c060134c736 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -121,6 +121,8 @@ struct fq_sched_data { u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; + + u32 timer_slack; /* hrtimer slack in ns */ struct qdisc_watchdog watchdog; }; @@ -504,8 +506,9 @@ begin: head = &q->old_flows; if (!head->first) { if (q->time_next_delayed_flow != ~0ULL) - qdisc_watchdog_schedule_ns(&q->watchdog, - q->time_next_delayed_flow); + qdisc_watchdog_schedule_range_ns(&q->watchdog, + q->time_next_delayed_flow, + q->timer_slack); return NULL; } } @@ -735,6 +738,8 @@ static int fq_resize(struct Qdisc *sch, u32 log) } static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { + [TCA_FQ_UNSPEC] = { .strict_start_type = TCA_FQ_TIMER_SLACK }, + [TCA_FQ_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 }, [TCA_FQ_QUANTUM] = { .type = NLA_U32 }, @@ -747,6 +752,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, + [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt, @@ -833,6 +839,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, q->ce_threshold = (u64)NSEC_PER_USEC * nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]); + if (tb[TCA_FQ_TIMER_SLACK]) + q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]); + if (!err) { sch_tree_unlock(sch); err = fq_resize(sch, fq_log); @@ -884,6 +893,8 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->orphan_mask = 1024 - 1; q->low_rate_threshold = 550000 / 8; + q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */ + /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; @@ -924,7 +935,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) || - nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) + nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) || + nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack)) goto nla_put_failure; return nla_nest_end(skb, opts); @@ -947,7 +959,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.flows_plimit = q->stat_flows_plimit; st.pkts_too_long = q->stat_pkts_too_long; st.allocation_errors = q->stat_allocation_errors; - st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); + st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack - + ktime_get_ns(); st.flows = q->flows; st.inactive_flows = q->inactive_flows; st.throttled_flows = q->throttled_flows; diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index a0795227216e..efd798a85931 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -8,9 +8,9 @@ tc_flower_get_target() # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 12K (12,288) flow counters and six of + # Currently, the driver supports 30K (30,720) flow counters and six of # these are used for multicast routing. - local target=12282 + local target=30714 if ((! should_fail)); then echo $target diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh new file mode 100755 index 000000000000..20ed98fe5a60 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + default_hw_stats_test + immediate_hw_stats_test + delayed_hw_stats_test + disabled_hw_stats_test +" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +hw_stats_test() +{ + RET=0 + + local name=$1 + local action_hw_stats=$2 + local occ_delta=$3 + local expected_packet_count=$4 + + local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats + check_err $? "Failed to add rule with $name hw_stats" + + local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + local expected_occ=$((orig_occ + occ_delta)) + [ "$new_occ" == "$expected_occ" ] + check_err $? "Expected occupancy of $expected_occ, got $new_occ" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count + check_err $? "Did not match incoming packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "$name hw_stats" +} + +default_hw_stats_test() +{ + hw_stats_test "default" "" 2 1 +} + +immediate_hw_stats_test() +{ + hw_stats_test "immediate" "hw_stats immediate" 2 1 +} + +delayed_hw_stats_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed + check_fail $? "Unexpected success in adding rule with delayed hw_stats" + + log_test "delayed hw_stats" +} + +disabled_hw_stats_test() +{ + hw_stats_test "disabled" "hw_stats disabled" 0 0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + swp1mac=$(mac_get $swp1) + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +check_tc_action_hw_stats_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index a4a7879b3bb9..977fc2b326a2 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -60,6 +60,15 @@ check_tc_chain_support() fi } +check_tc_action_hw_stats_support() +{ + tc actions help 2>&1 | grep -q hw_stats + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing action hw_stats support" + exit 1 + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit 0 diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh new file mode 100755 index 000000000000..0e7693297765 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -0,0 +1,163 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by an action skbedit priority. The +# new priority should be taken into account when classifying traffic on the PRIO +# qdisc at $swp2. The test verifies that for different priority values, the +# traffic ends up in expected PRIO band. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | | PRIO | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ingress + test_egress +" + +NUM_NETIFS=4 +source lib.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + tc qdisc add dev $swp2 root handle 10: \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 +} + +switch_destroy() +{ + tc qdisc del dev $swp2 root + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +test_skbedit_priority_one() +{ + local locus=$1; shift + local prio=$1; shift + local classid=$1; shift + + RET=0 + + tc filter add $locus handle 101 pref 1 \ + flower action skbedit priority $prio + + local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets) + $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q + local pkt1 + pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \ + qdisc_parent_stats_get $swp2 $classid .packets) + + check_err $? "Expected to get 10 packets on class $classid, but got +$((pkt1 - pkt0))." + log_test "$locus skbedit priority $prio -> classid $classid" + + tc filter del $locus pref 1 +} + +test_ingress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp1 ingress" \ + $prio 10:$((8 - prio)) + done +} + +test_egress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp2 egress" \ + $prio 10:$((8 - prio)) + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS |