From f2c750fedd2e102b95a3b1008703b9f54bbc9b08 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 10 Jun 2016 23:00:56 +0200 Subject: batman-adv: Use rtnl link in device creation example The standard kernel API to add new virtual interfaces and attach other interfaces to it is rtnl-link. batman-adv supports it since v3.10. This functionality should be used instead of the legacy batman-adv-only sysfs interface. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich --- Documentation/networking/batman-adv.txt | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index 1b5e7a7f2185..8a8d3d96f6c6 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -43,10 +43,15 @@ new interfaces to verify the compatibility. There is no need to reload the module if you plug your USB wifi adapter into your ma- chine after batman advanced was initially loaded. -To activate a given interface simply write "bat0" into its -"mesh_iface" file inside the batman_adv subfolder: +The batman-adv soft-interface can be created using the iproute2 +tool "ip" -# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface +# ip link add name bat0 type batadv + +To activate a given interface simply attach it to the "bat0" +interface + +# ip link set dev eth0 master bat0 Repeat this step for all interfaces you wish to add. Now batman starts using/broadcasting on this/these interface(s). @@ -56,10 +61,10 @@ By reading the "iface_status" file you can check its status: # cat /sys/class/net/eth0/batman_adv/iface_status # active -To deactivate an interface you have to write "none" into its -"mesh_iface" file: +To deactivate an interface you have to detach it from the +"bat0" interface: -# echo none > /sys/class/net/eth0/batman_adv/mesh_iface +# ip link set dev eth0 nomaster All mesh wide settings can be found in batman's own interface -- cgit v1.2.3 From 71e11aff34510e4133965d63ae6d91a0cf4aa876 Mon Sep 17 00:00:00 2001 From: Appana Durga Kedareswara Rao Date: Wed, 10 Aug 2016 11:20:07 +0530 Subject: Documentation: DT: net: Add Xilinx gmiitorgmii converter device tree binding documentation Device-tree binding documentation for xilinx gmiitorgmii converter. Signed-off-by: Kedareswara rao Appana Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/xilinx_gmii2rgmii.txt | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt new file mode 100644 index 000000000000..038dda48b8e6 --- /dev/null +++ b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt @@ -0,0 +1,35 @@ +XILINX GMIITORGMII Converter Driver Device Tree Bindings +-------------------------------------------------------- + +The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media +Independent Interface (RGMII) core provides the RGMII between RGMII-compliant +Ethernet physical media devices (PHY) and the Gigabit Ethernet controller. +This core can be used in all three modes of operation(10/100/1000 Mb/s). +The Management Data Input/Output (MDIO) interface is used to configure the +Speed of operation. This core can switch dynamically between the three +Different speed modes by configuring the conveter register through mdio write. + +This converter sits between the ethernet MAC and the external phy. +MAC <==> GMII2RGMII <==> RGMII_PHY + +For more details about mdio please refer phy.txt file in the same directory. + +Required properties: +- compatible : Should be "xlnx,gmii-to-rgmii-1.0" +- reg : The ID number for the phy, usually a small integer +- phy-handle : Should point to the external phy device. + See ethernet.txt file in the same directory. + +Example: + mdio { + #address-cells = <1>; + #size-cells = <0>; + phy: ethernet-phy@0 { + ...... + }; + gmiitorgmii: gmiitorgmii@8 { + compatible = "xlnx,gmii-to-rgmii-1.0"; + reg = <8>; + phy-handle = <&phy>; + }; + }; -- cgit v1.2.3 From 1738cd3ed342294360d6a74d4e58800004bff854 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 10 Aug 2016 14:03:22 +0300 Subject: net: ena: Add a driver for Amazon Elastic Network Adapters (ENA) This is a driver for the ENA family of networking devices. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 + Documentation/networking/ena.txt | 305 ++ MAINTAINERS | 9 + drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/amazon/Kconfig | 27 + drivers/net/ethernet/amazon/Makefile | 5 + drivers/net/ethernet/amazon/ena/Makefile | 7 + drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 973 ++++++ drivers/net/ethernet/amazon/ena/ena_com.c | 2666 +++++++++++++++++ drivers/net/ethernet/amazon/ena/ena_com.h | 1038 +++++++ drivers/net/ethernet/amazon/ena/ena_common_defs.h | 48 + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 501 ++++ drivers/net/ethernet/amazon/ena/ena_eth_com.h | 160 + drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h | 416 +++ drivers/net/ethernet/amazon/ena/ena_ethtool.c | 895 ++++++ drivers/net/ethernet/amazon/ena/ena_netdev.c | 3280 +++++++++++++++++++++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 324 ++ drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h | 67 + drivers/net/ethernet/amazon/ena/ena_regs_defs.h | 133 + 20 files changed, 10858 insertions(+) create mode 100644 Documentation/networking/ena.txt create mode 100644 drivers/net/ethernet/amazon/Kconfig create mode 100644 drivers/net/ethernet/amazon/Makefile create mode 100644 drivers/net/ethernet/amazon/ena/Makefile create mode 100644 drivers/net/ethernet/amazon/ena/ena_admin_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_com.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_com.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_common_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_com.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_com.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_ethtool.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_netdev.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_netdev.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_regs_defs.h (limited to 'Documentation') diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 415154a487d0..a7697783ac4c 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -74,6 +74,8 @@ dns_resolver.txt - The DNS resolver module allows kernel servies to make DNS queries. driver.txt - Softnet driver issues. +ena.txt + - info on Amazon's Elastic Network Adapter (ENA) e100.txt - info on Intel's EtherExpress PRO/100 line of 10/100 boards e1000.txt diff --git a/Documentation/networking/ena.txt b/Documentation/networking/ena.txt new file mode 100644 index 000000000000..2b4b6f57e549 --- /dev/null +++ b/Documentation/networking/ena.txt @@ -0,0 +1,305 @@ +Linux kernel driver for Elastic Network Adapter (ENA) family: +============================================================= + +Overview: +========= +ENA is a networking interface designed to make good use of modern CPU +features and system architectures. + +The ENA device exposes a lightweight management interface with a +minimal set of memory mapped registers and extendable command set +through an Admin Queue. + +The driver supports a range of ENA devices, is link-speed independent +(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has +a negotiated and extendable feature set. + +Some ENA devices support SR-IOV. This driver is used for both the +SR-IOV Physical Function (PF) and Virtual Function (VF) devices. + +ENA devices enable high speed and low overhead network traffic +processing by providing multiple Tx/Rx queue pairs (the maximum number +is advertised by the device via the Admin Queue), a dedicated MSI-X +interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation, +and CPU cacheline optimized data placement. + +The ENA driver supports industry standard TCP/IP offload features such +as checksum offload and TCP transmit segmentation offload (TSO). +Receive-side scaling (RSS) is supported for multi-core scaling. + +The ENA driver and its corresponding devices implement health +monitoring mechanisms such as watchdog, enabling the device and driver +to recover in a manner transparent to the application, as well as +debug logs. + +Some of the ENA devices support a working mode called Low-latency +Queue (LLQ), which saves several more microseconds. + +Supported PCI vendor ID/device IDs: +=================================== +1d0f:0ec2 - ENA PF +1d0f:1ec2 - ENA PF with LLQ support +1d0f:ec20 - ENA VF +1d0f:ec21 - ENA VF with LLQ support + +ENA Source Code Directory Structure: +==================================== +ena_com.[ch] - Management communication layer. This layer is + responsible for the handling all the management + (admin) communication between the device and the + driver. +ena_eth_com.[ch] - Tx/Rx data path. +ena_admin_defs.h - Definition of ENA management interface. +ena_eth_io_defs.h - Definition of ENA data path interface. +ena_common_defs.h - Common definitions for ena_com layer. +ena_regs_defs.h - Definition of ENA PCI memory-mapped (MMIO) registers. +ena_netdev.[ch] - Main Linux kernel driver. +ena_syfsfs.[ch] - Sysfs files. +ena_ethtool.c - ethtool callbacks. +ena_pci_id_tbl.h - Supported device IDs. + +Management Interface: +===================== +ENA management interface is exposed by means of: +- PCIe Configuration Space +- Device Registers +- Admin Queue (AQ) and Admin Completion Queue (ACQ) +- Asynchronous Event Notification Queue (AENQ) + +ENA device MMIO Registers are accessed only during driver +initialization and are not involved in further normal device +operation. + +AQ is used for submitting management commands, and the +results/responses are reported asynchronously through ACQ. + +ENA introduces a very small set of management commands with room for +vendor-specific extensions. Most of the management operations are +framed in a generic Get/Set feature command. + +The following admin queue commands are supported: +- Create I/O submission queue +- Create I/O completion queue +- Destroy I/O submission queue +- Destroy I/O completion queue +- Get feature +- Set feature +- Configure AENQ +- Get statistics + +Refer to ena_admin_defs.h for the list of supported Get/Set Feature +properties. + +The Asynchronous Event Notification Queue (AENQ) is a uni-directional +queue used by the ENA device to send to the driver events that cannot +be reported using ACQ. AENQ events are subdivided into groups. Each +group may have multiple syndromes, as shown below + +The events are: + Group Syndrome + Link state change - X - + Fatal error - X - + Notification Suspend traffic + Notification Resume traffic + Keep-Alive - X - + +ACQ and AENQ share the same MSI-X vector. + +Keep-Alive is a special mechanism that allows monitoring of the +device's health. The driver maintains a watchdog (WD) handler which, +if fired, logs the current state and statistics then resets and +restarts the ENA device and driver. A Keep-Alive event is delivered by +the device every second. The driver re-arms the WD upon reception of a +Keep-Alive event. A missed Keep-Alive event causes the WD handler to +fire. + +Data Path Interface: +==================== +I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx +SQ correspondingly). Each SQ has a completion queue (CQ) associated +with it. + +The SQs and CQs are implemented as descriptor rings in contiguous +physical memory. + +The ENA driver supports two Queue Operation modes for Tx SQs: +- Regular mode + * In this mode the Tx SQs reside in the host's memory. The ENA + device fetches the ENA Tx descriptors and packet data from host + memory. +- Low Latency Queue (LLQ) mode or "push-mode". + * In this mode the driver pushes the transmit descriptors and the + first 128 bytes of the packet directly to the ENA device memory + space. The rest of the packet payload is fetched by the + device. For this operation mode, the driver uses a dedicated PCI + device memory BAR, which is mapped with write-combine capability. + +The Rx SQs support only the regular mode. + +Note: Not all ENA devices support LLQ, and this feature is negotiated + with the device upon initialization. If the ENA device does not + support LLQ mode, the driver falls back to the regular mode. + +The driver supports multi-queue for both Tx and Rx. This has various +benefits: +- Reduced CPU/thread/process contention on a given Ethernet interface. +- Cache miss rate on completion is reduced, particularly for data + cache lines that hold the sk_buff structures. +- Increased process-level parallelism when handling received packets. +- Increased data cache hit rate, by steering kernel processing of + packets to the CPU, where the application thread consuming the + packet is running. +- In hardware interrupt re-direction. + +Interrupt Modes: +================ +The driver assigns a single MSI-X vector per queue pair (for both Tx +and Rx directions). The driver assigns an additional dedicated MSI-X vector +for management (for ACQ and AENQ). + +Management interrupt registration is performed when the Linux kernel +probes the adapter, and it is de-registered when the adapter is +removed. I/O queue interrupt registration is performed when the Linux +interface of the adapter is opened, and it is de-registered when the +interface is closed. + +The management interrupt is named: + ena-mgmnt@pci: +and for each queue pair, an interrupt is named: + -Tx-Rx- + +The ENA device operates in auto-mask and auto-clear interrupt +modes. That is, once MSI-X is delivered to the host, its Cause bit is +automatically cleared and the interrupt is masked. The interrupt is +unmasked by the driver after NAPI processing is complete. + +Interrupt Moderation: +===================== +ENA driver and device can operate in conventional or adaptive interrupt +moderation mode. + +In conventional mode the driver instructs device to postpone interrupt +posting according to static interrupt delay value. The interrupt delay +value can be configured through ethtool(8). The following ethtool +parameters are supported by the driver: tx-usecs, rx-usecs + +In adaptive interrupt moderation mode the interrupt delay value is +updated by the driver dynamically and adjusted every NAPI cycle +according to the traffic nature. + +By default ENA driver applies adaptive coalescing on Rx traffic and +conventional coalescing on Tx traffic. + +Adaptive coalescing can be switched on/off through ethtool(8) +adaptive_rx on|off parameter. + +The driver chooses interrupt delay value according to the number of +bytes and packets received between interrupt unmasking and interrupt +posting. The driver uses interrupt delay table that subdivides the +range of received bytes/packets into 5 levels and assigns interrupt +delay value to each level. + +The user can enable/disable adaptive moderation, modify the interrupt +delay table and restore its default values through sysfs. + +The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK +and can be configured by the ETHTOOL_STUNABLE command of the +SIOCETHTOOL ioctl. + +SKB: +The driver-allocated SKB for frames received from Rx handling using +NAPI context. The allocation method depends on the size of the packet. +If the frame length is larger than rx_copybreak, napi_get_frags() +is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer +content is copied (by CPU) to the SKB, and the buffer is recycled. + +Statistics: +=========== +The user can obtain ENA device and driver statistics using ethtool. +The driver can collect regular or extended statistics (including +per-queue stats) from the device. + +In addition the driver logs the stats to syslog upon device reset. + +MTU: +==== +The driver supports an arbitrarily large MTU with a maximum that is +negotiated with the device. The driver configures MTU using the +SetFeature command (ENA_ADMIN_MTU property). The user can change MTU +via ip(8) and similar legacy tools. + +Stateless Offloads: +=================== +The ENA driver supports: +- TSO over IPv4/IPv6 +- TSO with ECN +- IPv4 header checksum offload +- TCP/UDP over IPv4/IPv6 checksum offloads + +RSS: +==== +- The ENA device supports RSS that allows flexible Rx traffic + steering. +- Toeplitz and CRC32 hash functions are supported. +- Different combinations of L2/L3/L4 fields can be configured as + inputs for hash functions. +- The driver configures RSS settings using the AQ SetFeature command + (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties). +- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash + function delivered in the Rx CQ descriptor is set in the received + SKB. +- The user can provide a hash key, hash function, and configure the + indirection table through ethtool(8). + +DATA PATH: +========== +Tx: +--- +end_start_xmit() is called by the stack. This function does the following: +- Maps data buffers (skb->data and frags). +- Populates ena_buf for the push buffer (if the driver and device are + in push mode.) +- Prepares ENA bufs for the remaining frags. +- Allocates a new request ID from the empty req_id ring. The request + ID is the index of the packet in the Tx info. This is used for + out-of-order TX completions. +- Adds the packet to the proper place in the Tx ring. +- Calls ena_com_prepare_tx(), an ENA communication layer that converts + the ena_bufs to ENA descriptors (and adds meta ENA descriptors as + needed.) + * This function also copies the ENA descriptors and the push buffer + to the Device memory space (if in push mode.) +- Writes doorbell to the ENA device. +- When the ENA device finishes sending the packet, a completion + interrupt is raised. +- The interrupt handler schedules NAPI. +- The ena_clean_tx_irq() function is called. This function handles the + completion descriptors generated by the ENA, with a single + completion descriptor per completed packet. + * req_id is retrieved from the completion descriptor. The tx_info of + the packet is retrieved via the req_id. The data buffers are + unmapped and req_id is returned to the empty req_id ring. + * The function stops when the completion descriptors are completed or + the budget is reached. + +Rx: +--- +- When a packet is received from the ENA device. +- The interrupt handler schedules NAPI. +- The ena_clean_rx_irq() function is called. This function calls + ena_rx_pkt(), an ENA communication layer function, which returns the + number of descriptors used for a new unhandled packet, and zero if + no new packet is found. +- Then it calls the ena_clean_rx_irq() function. +- ena_eth_rx_skb() checks packet length: + * If the packet is small (len < rx_copybreak), the driver allocates + a SKB for the new packet, and copies the packet payload into the + SKB data buffer. + - In this way the original data buffer is not passed to the stack + and is reused for future Rx packets. + * Otherwise the function unmaps the Rx buffer, then allocates the + new SKB structure and hooks the Rx buffer to the SKB frags. +- The new SKB is updated with the necessary information (protocol, + checksum hw verify result, etc.), and then passed to the network + stack, using the NAPI interface function napi_gro_receive(). diff --git a/MAINTAINERS b/MAINTAINERS index 429fc61bee81..50ce33dc62f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -636,6 +636,15 @@ F: drivers/tty/serial/altera_jtaguart.c F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h +AMAZON ETHERNET DRIVERS +M: Netanel Belgazal +R: Saeed Bishara +R: Zorik Machulsky +L: netdev@vger.kernel.org +S: Supported +F: Documentation/networking/ena.txt +F: drivers/net/ethernet/amazon/ + AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER M: Tom Lendacky M: Gary Hook diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 2ffd63463299..8cc7467b6c1f 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -24,6 +24,7 @@ source "drivers/net/ethernet/agere/Kconfig" source "drivers/net/ethernet/allwinner/Kconfig" source "drivers/net/ethernet/alteon/Kconfig" source "drivers/net/ethernet/altera/Kconfig" +source "drivers/net/ethernet/amazon/Kconfig" source "drivers/net/ethernet/amd/Kconfig" source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 1d349e9aa9a6..a09423df83f2 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/ obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/ obj-$(CONFIG_ALTERA_TSE) += altera/ +obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/ obj-$(CONFIG_NET_VENDOR_AMD) += amd/ obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig new file mode 100644 index 000000000000..99b30353541a --- /dev/null +++ b/drivers/net/ethernet/amazon/Kconfig @@ -0,0 +1,27 @@ +# +# Amazon network device configuration +# + +config NET_VENDOR_AMAZON + bool "Amazon Devices" + default y + ---help--- + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Amazon devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_AMAZON + +config ENA_ETHERNET + tristate "Elastic Network Adapter (ENA) support" + depends on (PCI_MSI && X86) + ---help--- + This driver supports Elastic Network Adapter (ENA)" + + To compile this driver as a module, choose M here. + The module will be called ena. + +endif #NET_VENDOR_AMAZON diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile new file mode 100644 index 000000000000..8e0b73f60d51 --- /dev/null +++ b/drivers/net/ethernet/amazon/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Amazon network device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena/ diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile new file mode 100644 index 000000000000..eaeeae06c5d9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Elastic Network Adapter (ENA) device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena.o + +ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h new file mode 100644 index 000000000000..a46e749bf226 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -0,0 +1,973 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ADMIN_H_ +#define _ENA_ADMIN_H_ + +enum ena_admin_aq_opcode { + ENA_ADMIN_CREATE_SQ = 1, + + ENA_ADMIN_DESTROY_SQ = 2, + + ENA_ADMIN_CREATE_CQ = 3, + + ENA_ADMIN_DESTROY_CQ = 4, + + ENA_ADMIN_GET_FEATURE = 8, + + ENA_ADMIN_SET_FEATURE = 9, + + ENA_ADMIN_GET_STATS = 11, +}; + +enum ena_admin_aq_completion_status { + ENA_ADMIN_SUCCESS = 0, + + ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, + + ENA_ADMIN_BAD_OPCODE = 2, + + ENA_ADMIN_UNSUPPORTED_OPCODE = 3, + + ENA_ADMIN_MALFORMED_REQUEST = 4, + + /* Additional status is provided in ACQ entry extended_status */ + ENA_ADMIN_ILLEGAL_PARAMETER = 5, + + ENA_ADMIN_UNKNOWN_ERROR = 6, +}; + +enum ena_admin_aq_feature_id { + ENA_ADMIN_DEVICE_ATTRIBUTES = 1, + + ENA_ADMIN_MAX_QUEUES_NUM = 2, + + ENA_ADMIN_RSS_HASH_FUNCTION = 10, + + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, + + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, + + ENA_ADMIN_MTU = 14, + + ENA_ADMIN_RSS_HASH_INPUT = 18, + + ENA_ADMIN_INTERRUPT_MODERATION = 20, + + ENA_ADMIN_AENQ_CONFIG = 26, + + ENA_ADMIN_LINK_CONFIG = 27, + + ENA_ADMIN_HOST_ATTR_CONFIG = 28, + + ENA_ADMIN_FEATURES_OPCODE_NUM = 32, +}; + +enum ena_admin_placement_policy_type { + /* descriptors and headers are in host memory */ + ENA_ADMIN_PLACEMENT_POLICY_HOST = 1, + + /* descriptors and headers are in device memory (a.k.a Low Latency + * Queue) + */ + ENA_ADMIN_PLACEMENT_POLICY_DEV = 3, +}; + +enum ena_admin_link_types { + ENA_ADMIN_LINK_SPEED_1G = 0x1, + + ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2, + + ENA_ADMIN_LINK_SPEED_5G = 0x4, + + ENA_ADMIN_LINK_SPEED_10G = 0x8, + + ENA_ADMIN_LINK_SPEED_25G = 0x10, + + ENA_ADMIN_LINK_SPEED_40G = 0x20, + + ENA_ADMIN_LINK_SPEED_50G = 0x40, + + ENA_ADMIN_LINK_SPEED_100G = 0x80, + + ENA_ADMIN_LINK_SPEED_200G = 0x100, + + ENA_ADMIN_LINK_SPEED_400G = 0x200, +}; + +enum ena_admin_completion_policy_type { + /* completion queue entry for each sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC = 0, + + /* completion queue entry upon request in sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1, + + /* current queue head pointer is updated in OS memory upon sq + * descriptor request + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2, + + /* current queue head pointer is updated in OS memory for each sq + * descriptor + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD = 3, +}; + +/* basic stats return ena_admin_basic_stats while extanded stats return a + * buffer (string format) with additional statistics per queue and per + * device id + */ +enum ena_admin_get_stats_type { + ENA_ADMIN_GET_STATS_TYPE_BASIC = 0, + + ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1, +}; + +enum ena_admin_get_stats_scope { + ENA_ADMIN_SPECIFIC_QUEUE = 0, + + ENA_ADMIN_ETH_TRAFFIC = 1, +}; + +struct ena_admin_aq_common_desc { + /* 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command_id; + + /* as appears in ena_admin_aq_opcode */ + u8 opcode; + + /* 0 : phase + * 1 : ctrl_data - control buffer address valid + * 2 : ctrl_data_indirect - control buffer address + * points to list of pages with addresses of control + * buffers + * 7:3 : reserved3 + */ + u8 flags; +}; + +/* used in ena_admin_aq_entry. Can point directly to control data, or to a + * page list chunk. Used also at the end of indirect mode page list chunks, + * for chaining. + */ +struct ena_admin_ctrl_buff_info { + u32 length; + + struct ena_common_mem_addr address; +}; + +struct ena_admin_sq { + u16 sq_idx; + + /* 4:0 : reserved + * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved1; +}; + +struct ena_admin_aq_entry { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + u32 inline_data_w4[12]; +}; + +struct ena_admin_acq_common_desc { + /* command identifier to associate it with the aq descriptor + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command; + + u8 status; + + /* 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 extended_status; + + /* serves as a hint what AQ entries can be revoked */ + u16 sq_head_indx; +}; + +struct ena_admin_acq_entry { + struct ena_admin_acq_common_desc acq_common_descriptor; + + u32 response_specific_data[14]; +}; + +struct ena_admin_aq_create_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved0_w1 + * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved8_w1; + + /* 3:0 : placement_policy - Describing where the SQ + * descriptor ring and the SQ packet headers reside: + * 0x1 - descriptors and headers are in OS memory, + * 0x3 - descriptors and headers in device memory + * (a.k.a Low Latency Queue) + * 6:4 : completion_policy - Describing what policy + * to use for generation completion entry (cqe) in + * the CQ associated with this SQ: 0x0 - cqe for each + * sq descriptor, 0x1 - cqe upon request in sq + * descriptor, 0x2 - current queue head pointer is + * updated in OS memory upon sq descriptor request + * 0x3 - current queue head pointer is updated in OS + * memory for each sq descriptor + * 7 : reserved15_w1 + */ + u8 sq_caps_2; + + /* 0 : is_physically_contiguous - Described if the + * queue ring memory is allocated in physical + * contiguous pages or split. + * 7:1 : reserved17_w1 + */ + u8 sq_caps_3; + + /* associated completion queue id. This CQ must be created prior to + * SQ creation + */ + u16 cq_idx; + + /* submission queue depth in entries */ + u16 sq_depth; + + /* SQ physical base address in OS memory. This field should not be + * used for Low Latency queues. Has to be page aligned. + */ + struct ena_common_mem_addr sq_ba; + + /* specifies queue head writeback location in OS memory. Valid if + * completion_policy is set to completion_policy_head_on_demand or + * completion_policy_head. Has to be cache aligned + */ + struct ena_common_mem_addr sq_head_writeback; + + u32 reserved0_w7; + + u32 reserved0_w8; +}; + +enum ena_admin_sq_direction { + ENA_ADMIN_SQ_DIRECTION_TX = 1, + + ENA_ADMIN_SQ_DIRECTION_RX = 2, +}; + +struct ena_admin_acq_create_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 sq_idx; + + u16 reserved; + + /* queue doorbell address as an offset to PCIe MMIO REG BAR */ + u32 sq_doorbell_offset; + + /* low latency queue ring base address as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_descriptors_offset; + + /* low latency queue headers' memory as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_headers_offset; +}; + +struct ena_admin_aq_destroy_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_sq sq; +}; + +struct ena_admin_acq_destroy_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +struct ena_admin_aq_create_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved5 + * 5 : interrupt_mode_enabled - if set, cq operates + * in interrupt mode, otherwise - polling + * 7:6 : reserved6 + */ + u8 cq_caps_1; + + /* 4:0 : cq_entry_size_words - size of CQ entry in + * 32-bit words, valid values: 4, 8. + * 7:5 : reserved7 + */ + u8 cq_caps_2; + + /* completion queue depth in # of entries. must be power of 2 */ + u16 cq_depth; + + /* msix vector assigned to this cq */ + u32 msix_vector; + + /* cq physical base address in OS memory. CQ must be physically + * contiguous + */ + struct ena_common_mem_addr cq_ba; +}; + +struct ena_admin_acq_create_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 cq_idx; + + /* actual cq depth in number of entries */ + u16 cq_actual_depth; + + u32 numa_node_register_offset; + + u32 cq_head_db_register_offset; + + u32 cq_interrupt_unmask_register_offset; +}; + +struct ena_admin_aq_destroy_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + u16 cq_idx; + + u16 reserved1; +}; + +struct ena_admin_acq_destroy_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +/* ENA AQ Get Statistics command. Extended statistics are placed in control + * buffer pointed by AQ entry + */ +struct ena_admin_aq_get_stats_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + /* command specific inline data */ + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + /* stats type as defined in enum ena_admin_get_stats_type */ + u8 type; + + /* stats scope defined in enum ena_admin_get_stats_scope */ + u8 scope; + + u16 reserved3; + + /* queue id. used when scope is specific_queue */ + u16 queue_idx; + + /* device id, value 0xFFFF means mine. only privileged device can get + * stats of other device + */ + u16 device_id; +}; + +/* Basic Statistics Command. */ +struct ena_admin_basic_stats { + u32 tx_bytes_low; + + u32 tx_bytes_high; + + u32 tx_pkts_low; + + u32 tx_pkts_high; + + u32 rx_bytes_low; + + u32 rx_bytes_high; + + u32 rx_pkts_low; + + u32 rx_pkts_high; + + u32 rx_drops_low; + + u32 rx_drops_high; +}; + +struct ena_admin_acq_get_stats_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + struct ena_admin_basic_stats basic_stats; +}; + +struct ena_admin_get_set_feature_common_desc { + /* 1:0 : select - 0x1 - current value; 0x3 - default + * value + * 7:3 : reserved3 + */ + u8 flags; + + /* as appears in ena_admin_aq_feature_id */ + u8 feature_id; + + u16 reserved16; +}; + +struct ena_admin_device_attr_feature_desc { + u32 impl_id; + + u32 device_version; + + /* bitmap of ena_admin_aq_feature_id */ + u32 supported_features; + + u32 reserved3; + + /* Indicates how many bits are used physical address access. */ + u32 phys_addr_width; + + /* Indicates how many bits are used virtual address access. */ + u32 virt_addr_width; + + /* unicast MAC address (in Network byte order) */ + u8 mac_addr[6]; + + u8 reserved7[2]; + + u32 max_mtu; +}; + +struct ena_admin_queue_feature_desc { + /* including LLQs */ + u32 max_sq_num; + + u32 max_sq_depth; + + u32 max_cq_num; + + u32 max_cq_depth; + + u32 max_llq_num; + + u32 max_llq_depth; + + u32 max_header_size; + + /* Maximum Descriptors number, including meta descriptor, allowed for + * a single Tx packet + */ + u16 max_packet_tx_descs; + + /* Maximum Descriptors number allowed for a single Rx packet */ + u16 max_packet_rx_descs; +}; + +struct ena_admin_set_feature_mtu_desc { + /* exclude L2 */ + u32 mtu; +}; + +struct ena_admin_set_feature_host_attr_desc { + /* host OS info base address in OS memory. host info is 4KB of + * physically contiguous + */ + struct ena_common_mem_addr os_info_ba; + + /* host debug area base address in OS memory. debug area must be + * physically contiguous + */ + struct ena_common_mem_addr debug_ba; + + /* debug area size */ + u32 debug_area_size; +}; + +struct ena_admin_feature_intr_moder_desc { + /* interrupt delay granularity in usec */ + u16 intr_delay_resolution; + + u16 reserved; +}; + +struct ena_admin_get_feature_link_desc { + /* Link speed in Mb */ + u32 speed; + + /* bit field of enum ena_admin_link types */ + u32 supported; + + /* 0 : autoneg + * 1 : duplex - Full Duplex + * 31:2 : reserved2 + */ + u32 flags; +}; + +struct ena_admin_feature_aenq_desc { + /* bitmask for AENQ groups the device can report */ + u32 supported_groups; + + /* bitmask for AENQ groups to report */ + u32 enabled_groups; +}; + +struct ena_admin_feature_offload_desc { + /* 0 : TX_L3_csum_ipv4 + * 1 : TX_L4_ipv4_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 2 : TX_L4_ipv4_csum_full + * 3 : TX_L4_ipv6_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 4 : TX_L4_ipv6_csum_full + * 5 : tso_ipv4 + * 6 : tso_ipv6 + * 7 : tso_ecn + */ + u32 tx; + + /* Receive side supported stateless offload + * 0 : RX_L3_csum_ipv4 - IPv4 checksum + * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum + * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum + * 3 : RX_hash - Hash calculation + */ + u32 rx_supported; + + u32 rx_enabled; +}; + +enum ena_admin_hash_functions { + ENA_ADMIN_TOEPLITZ = 1, + + ENA_ADMIN_CRC32 = 2, +}; + +struct ena_admin_feature_rss_flow_hash_control { + u32 keys_num; + + u32 reserved; + + u32 key[10]; +}; + +struct ena_admin_feature_rss_flow_hash_function { + /* 7:0 : funcs - bitmask of ena_admin_hash_functions */ + u32 supported_func; + + /* 7:0 : selected_func - bitmask of + * ena_admin_hash_functions + */ + u32 selected_func; + + /* initial value */ + u32 init_val; +}; + +/* RSS flow hash protocols */ +enum ena_admin_flow_hash_proto { + ENA_ADMIN_RSS_TCP4 = 0, + + ENA_ADMIN_RSS_UDP4 = 1, + + ENA_ADMIN_RSS_TCP6 = 2, + + ENA_ADMIN_RSS_UDP6 = 3, + + ENA_ADMIN_RSS_IP4 = 4, + + ENA_ADMIN_RSS_IP6 = 5, + + ENA_ADMIN_RSS_IP4_FRAG = 6, + + ENA_ADMIN_RSS_NOT_IP = 7, + + ENA_ADMIN_RSS_PROTO_NUM = 16, +}; + +/* RSS flow hash fields */ +enum ena_admin_flow_hash_fields { + /* Ethernet Dest Addr */ + ENA_ADMIN_RSS_L2_DA = 0, + + /* Ethernet Src Addr */ + ENA_ADMIN_RSS_L2_SA = 1, + + /* ipv4/6 Dest Addr */ + ENA_ADMIN_RSS_L3_DA = 2, + + /* ipv4/6 Src Addr */ + ENA_ADMIN_RSS_L3_SA = 5, + + /* tcp/udp Dest Port */ + ENA_ADMIN_RSS_L4_DP = 6, + + /* tcp/udp Src Port */ + ENA_ADMIN_RSS_L4_SP = 7, +}; + +struct ena_admin_proto_input { + /* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */ + u16 fields; + + u16 reserved2; +}; + +struct ena_admin_feature_rss_hash_control { + struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM]; +}; + +struct ena_admin_feature_rss_flow_hash_input { + /* supported hash input sorting + * 1 : L3_sort - support swap L3 addresses if DA is + * smaller than SA + * 2 : L4_sort - support swap L4 ports if DP smaller + * SP + */ + u16 supported_input_sort; + + /* enabled hash input sorting + * 1 : enable_L3_sort - enable swap L3 addresses if + * DA smaller than SA + * 2 : enable_L4_sort - enable swap L4 ports if DP + * smaller than SP + */ + u16 enabled_input_sort; +}; + +enum ena_admin_os_type { + ENA_ADMIN_OS_LINUX = 1, + + ENA_ADMIN_OS_WIN = 2, + + ENA_ADMIN_OS_DPDK = 3, + + ENA_ADMIN_OS_FREEBSD = 4, + + ENA_ADMIN_OS_IPXE = 5, +}; + +struct ena_admin_host_info { + /* defined in enum ena_admin_os_type */ + u32 os_type; + + /* os distribution string format */ + u8 os_dist_str[128]; + + /* OS distribution numeric format */ + u32 os_dist; + + /* kernel version string format */ + u8 kernel_ver_str[32]; + + /* Kernel version numeric format */ + u32 kernel_ver; + + /* 7:0 : major + * 15:8 : minor + * 23:16 : sub_minor + */ + u32 driver_version; + + /* features bitmap */ + u32 supported_network_features[4]; +}; + +struct ena_admin_rss_ind_table_entry { + u16 cq_idx; + + u16 reserved; +}; + +struct ena_admin_feature_rss_ind_table { + /* min supported table size (2^min_size) */ + u16 min_size; + + /* max supported table size (2^max_size) */ + u16 max_size; + + /* table size (2^size) */ + u16 size; + + u16 reserved; + + /* index of the inline entry. 0xFFFFFFFF means invalid */ + u32 inline_index; + + /* used for updating single entry, ignored when setting the entire + * table through the control buffer. + */ + struct ena_admin_rss_ind_table_entry inline_entry; +}; + +struct ena_admin_get_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + u32 raw[11]; +}; + +struct ena_admin_get_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + + struct ena_admin_device_attr_feature_desc dev_attr; + + struct ena_admin_queue_feature_desc max_queue; + + struct ena_admin_feature_aenq_desc aenq; + + struct ena_admin_get_feature_link_desc link; + + struct ena_admin_feature_offload_desc offload; + + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + struct ena_admin_feature_rss_ind_table ind_table; + + struct ena_admin_feature_intr_moder_desc intr_moderation; + } u; +}; + +struct ena_admin_set_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + union { + u32 raw[11]; + + /* mtu size */ + struct ena_admin_set_feature_mtu_desc mtu; + + /* host attributes */ + struct ena_admin_set_feature_host_attr_desc host_attr; + + /* AENQ configuration */ + struct ena_admin_feature_aenq_desc aenq; + + /* rss flow hash function */ + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + /* rss flow hash input */ + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + /* rss indirection table */ + struct ena_admin_feature_rss_ind_table ind_table; + } u; +}; + +struct ena_admin_set_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + } u; +}; + +struct ena_admin_aenq_common_desc { + u16 group; + + u16 syndrom; + + /* 0 : phase */ + u8 flags; + + u8 reserved1[3]; + + u32 timestamp_low; + + u32 timestamp_high; +}; + +/* asynchronous event notification groups */ +enum ena_admin_aenq_group { + ENA_ADMIN_LINK_CHANGE = 0, + + ENA_ADMIN_FATAL_ERROR = 1, + + ENA_ADMIN_WARNING = 2, + + ENA_ADMIN_NOTIFICATION = 3, + + ENA_ADMIN_KEEP_ALIVE = 4, + + ENA_ADMIN_AENQ_GROUPS_NUM = 5, +}; + +enum ena_admin_aenq_notification_syndrom { + ENA_ADMIN_SUSPEND = 0, + + ENA_ADMIN_RESUME = 1, +}; + +struct ena_admin_aenq_entry { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* command specific inline data */ + u32 inline_data_w4[12]; +}; + +struct ena_admin_aenq_link_change_desc { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* 0 : link_status */ + u32 flags; +}; + +struct ena_admin_ena_mmio_req_read_less_resp { + u16 req_id; + + u16 reg_off; + + /* value is valid when poll is cleared */ + u32 reg_val; +}; + +/* aq_common_desc */ +#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) + +/* sq */ +#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5) + +/* acq_common_desc */ +#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aq_create_sq_cmd */ +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0) + +/* aq_create_cq_cmd */ +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) + +/* get_set_feature_common_desc */ +#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) + +/* get_feature_link_desc */ +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0) +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1 +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1) + +/* feature_offload_desc */ +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3) + +/* feature_rss_flow_hash_function */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0) + +/* feature_rss_flow_hash_input */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2) + +/* host_info */ +#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0) +#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8 +#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8) +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16 +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16) + +/* aenq_common_desc */ +#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aenq_link_change_desc */ +#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0) + +#endif /*_ENA_ADMIN_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c new file mode 100644 index 000000000000..3066d9c99984 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -0,0 +1,2666 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_com.h" + +/*****************************************************************************/ +/*****************************************************************************/ + +/* Timeout in micro-sec */ +#define ADMIN_CMD_TIMEOUT_US (1000000) + +#define ENA_ASYNC_QUEUE_DEPTH 4 +#define ENA_ADMIN_QUEUE_DEPTH 32 + +#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \ + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \ + | (ENA_COMMON_SPEC_VERSION_MINOR)) + +#define ENA_CTRL_MAJOR 0 +#define ENA_CTRL_MINOR 0 +#define ENA_CTRL_SUB_MINOR 1 + +#define MIN_ENA_CTRL_VER \ + (((ENA_CTRL_MAJOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ + ((ENA_CTRL_MINOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ + (ENA_CTRL_SUB_MINOR)) + +#define ENA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) +#define ENA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) + +#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF + +/*****************************************************************************/ +/*****************************************************************************/ +/*****************************************************************************/ + +enum ena_cmd_status { + ENA_CMD_SUBMITTED, + ENA_CMD_COMPLETED, + /* Abort - canceled by the driver */ + ENA_CMD_ABORTED, +}; + +struct ena_comp_ctx { + struct completion wait_event; + struct ena_admin_acq_entry *user_cqe; + u32 comp_size; + enum ena_cmd_status status; + /* status from the device */ + u8 comp_status; + u8 cmd_opcode; + bool occupied; +}; + +struct ena_com_stats_ctx { + struct ena_admin_aq_get_stats_cmd get_cmd; + struct ena_admin_acq_get_stats_resp get_resp; +}; + +static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, + struct ena_common_mem_addr *ena_addr, + dma_addr_t addr) +{ + if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) { + pr_err("dma address has more bits that the device supports\n"); + return -EINVAL; + } + + ena_addr->mem_addr_low = (u32)addr; + ena_addr->mem_addr_high = (u64)addr >> 32; + + return 0; +} + +static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_sq *sq = &queue->sq; + u16 size = ADMIN_SQ_SIZE(queue->q_depth); + + sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr, + GFP_KERNEL); + + if (!sq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + sq->head = 0; + sq->tail = 0; + sq->phase = 1; + + sq->db_addr = NULL; + + return 0; +} + +static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_cq *cq = &queue->cq; + u16 size = ADMIN_CQ_SIZE(queue->q_depth); + + cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr, + GFP_KERNEL); + + if (!cq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + cq->head = 0; + cq->phase = 1; + + return 0; +} + +static int ena_com_admin_init_aenq(struct ena_com_dev *dev, + struct ena_aenq_handlers *aenq_handlers) +{ + struct ena_com_aenq *aenq = &dev->aenq; + u32 addr_low, addr_high, aenq_caps; + u16 size; + + dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH; + size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH); + aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr, + GFP_KERNEL); + + if (!aenq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + aenq->head = aenq->q_depth; + aenq->phase = 1; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + + writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF); + writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF); + + aenq_caps = 0; + aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; + aenq_caps |= (sizeof(struct ena_admin_aenq_entry) + << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF); + + if (unlikely(!aenq_handlers)) { + pr_err("aenq handlers pointer is NULL\n"); + return -EINVAL; + } + + aenq->aenq_handlers = aenq_handlers; + + return 0; +} + +static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, + struct ena_comp_ctx *comp_ctx) +{ + comp_ctx->occupied = false; + atomic_dec(&queue->outstanding_cmds); +} + +static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, + u16 command_id, bool capture) +{ + if (unlikely(command_id >= queue->q_depth)) { + pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", + command_id, queue->q_depth); + return NULL; + } + + if (unlikely(queue->comp_ctx[command_id].occupied && capture)) { + pr_err("Completion context is occupied\n"); + return NULL; + } + + if (capture) { + atomic_inc(&queue->outstanding_cmds); + queue->comp_ctx[command_id].occupied = true; + } + + return &queue->comp_ctx[command_id]; +} + +static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct ena_comp_ctx *comp_ctx; + u16 tail_masked, cmd_id; + u16 queue_size_mask; + u16 cnt; + + queue_size_mask = admin_queue->q_depth - 1; + + tail_masked = admin_queue->sq.tail & queue_size_mask; + + /* In case of queue FULL */ + cnt = admin_queue->sq.tail - admin_queue->sq.head; + if (cnt >= admin_queue->q_depth) { + pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n", + admin_queue->sq.tail, admin_queue->sq.head, + admin_queue->q_depth); + admin_queue->stats.out_of_space++; + return ERR_PTR(-ENOSPC); + } + + cmd_id = admin_queue->curr_cmd_id; + + cmd->aq_common_descriptor.flags |= admin_queue->sq.phase & + ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + + cmd->aq_common_descriptor.command_id |= cmd_id & + ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true); + if (unlikely(!comp_ctx)) + return ERR_PTR(-EINVAL); + + comp_ctx->status = ENA_CMD_SUBMITTED; + comp_ctx->comp_size = (u32)comp_size_in_bytes; + comp_ctx->user_cqe = comp; + comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + + reinit_completion(&comp_ctx->wait_event); + + memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes); + + admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) & + queue_size_mask; + + admin_queue->sq.tail++; + admin_queue->stats.submitted_cmd++; + + if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0)) + admin_queue->sq.phase = !admin_queue->sq.phase; + + writel(admin_queue->sq.tail, admin_queue->sq.db_addr); + + return comp_ctx; +} + +static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) +{ + size_t size = queue->q_depth * sizeof(struct ena_comp_ctx); + struct ena_comp_ctx *comp_ctx; + u16 i; + + queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL); + if (unlikely(!queue->comp_ctx)) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + for (i = 0; i < queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(queue, i, false); + if (comp_ctx) + init_completion(&comp_ctx->wait_event); + } + + return 0; +} + +static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + unsigned long flags; + struct ena_comp_ctx *comp_ctx; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + if (unlikely(!admin_queue->running_state)) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + return ERR_PTR(-ENODEV); + } + comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd, + cmd_size_in_bytes, + comp, + comp_size_in_bytes); + if (unlikely(IS_ERR(comp_ctx))) + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + return comp_ctx; +} + +static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_sq *io_sq) +{ + size_t size; + int dev_node = 0; + + memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + io_sq->desc_entry_size = + (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_desc) : + sizeof(struct ena_eth_io_rx_desc); + + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + } + } else { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + } + } + + if (!io_sq->desc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_sq->tail = 0; + io_sq->next_to_comp = 0; + io_sq->phase = 1; + + return 0; +} + +static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_cq *io_cq) +{ + size_t size; + int prev_node = 0; + + memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + /* Use the basic completion descriptor for Rx */ + io_cq->cdesc_entry_size_in_bytes = + (io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_cdesc) : + sizeof(struct ena_eth_io_rx_cdesc_base); + + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + prev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, prev_node); + if (!io_cq->cdesc_addr.virt_addr) { + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, + GFP_KERNEL); + } + + if (!io_cq->cdesc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_cq->phase = 1; + io_cq->head = 0; + + return 0; +} + +static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue, + struct ena_admin_acq_entry *cqe) +{ + struct ena_comp_ctx *comp_ctx; + u16 cmd_id; + + cmd_id = cqe->acq_common_descriptor.command & + ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false); + if (unlikely(!comp_ctx)) { + pr_err("comp_ctx is NULL. Changing the admin queue running state\n"); + admin_queue->running_state = false; + return; + } + + comp_ctx->status = ENA_CMD_COMPLETED; + comp_ctx->comp_status = cqe->acq_common_descriptor.status; + + if (comp_ctx->user_cqe) + memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size); + + if (!admin_queue->polling) + complete(&comp_ctx->wait_event); +} + +static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue) +{ + struct ena_admin_acq_entry *cqe = NULL; + u16 comp_num = 0; + u16 head_masked; + u8 phase; + + head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1); + phase = admin_queue->cq.phase; + + cqe = &admin_queue->cq.entries[head_masked]; + + /* Go over all the completions */ + while ((cqe->acq_common_descriptor.flags & + ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Do not read the rest of the completion entry before the + * phase bit was validated + */ + rmb(); + ena_com_handle_single_admin_completion(admin_queue, cqe); + + head_masked++; + comp_num++; + if (unlikely(head_masked == admin_queue->q_depth)) { + head_masked = 0; + phase = !phase; + } + + cqe = &admin_queue->cq.entries[head_masked]; + } + + admin_queue->cq.head += comp_num; + admin_queue->cq.phase = phase; + admin_queue->sq.head += comp_num; + admin_queue->stats.completed_cmd += comp_num; +} + +static int ena_com_comp_status_to_errno(u8 comp_status) +{ + if (unlikely(comp_status != 0)) + pr_err("admin command failed[%u]\n", comp_status); + + if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR)) + return -EINVAL; + + switch (comp_status) { + case ENA_ADMIN_SUCCESS: + return 0; + case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE: + return -ENOMEM; + case ENA_ADMIN_UNSUPPORTED_OPCODE: + return -EPERM; + case ENA_ADMIN_BAD_OPCODE: + case ENA_ADMIN_MALFORMED_REQUEST: + case ENA_ADMIN_ILLEGAL_PARAMETER: + case ENA_ADMIN_UNKNOWN_ERROR: + return -EINVAL; + } + + return 0; +} + +static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + u32 start_time; + int ret; + + start_time = ((u32)jiffies_to_usecs(jiffies)); + + while (comp_ctx->status == ENA_CMD_SUBMITTED) { + if ((((u32)jiffies_to_usecs(jiffies)) - start_time) > + ADMIN_CMD_TIMEOUT_US) { + pr_err("Wait for completion (polling) timeout\n"); + /* ENA didn't have any completion */ + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.no_completion++; + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + ret = -ETIME; + goto err; + } + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + msleep(100); + } + + if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { + pr_err("Command was aborted\n"); + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.aborted_cmd++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + ret = -ENODEV; + goto err; + } + + WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", + comp_ctx->status); + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + int ret; + + wait_for_completion_timeout(&comp_ctx->wait_event, + usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US)); + + /* In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors + * 1) No completion (timeout reached) + * 2) There is completion but the device didn't get any msi-x interrupt. + */ + if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) { + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + admin_queue->stats.no_completion++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + if (comp_ctx->status == ENA_CMD_COMPLETED) + pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n", + comp_ctx->cmd_opcode); + else + pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n", + comp_ctx->cmd_opcode, comp_ctx->status); + + admin_queue->running_state = false; + ret = -ETIME; + goto err; + } + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +/* This method read the hardware device register through posting writes + * and waiting for response + * On timeout the function will return ENA_MMIO_READ_TIMEOUT + */ +static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp = + mmio_read->read_resp; + u32 mmio_read_reg, ret; + unsigned long flags; + int i; + + might_sleep(); + + /* If readless is disabled, perform regular read */ + if (!mmio_read->readless_supported) + return readl(ena_dev->reg_bar + offset); + + spin_lock_irqsave(&mmio_read->lock, flags); + mmio_read->seq_num++; + + read_resp->req_id = mmio_read->seq_num + 0xDEAD; + mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & + ENA_REGS_MMIO_REG_READ_REG_OFF_MASK; + mmio_read_reg |= mmio_read->seq_num & + ENA_REGS_MMIO_REG_READ_REQ_ID_MASK; + + /* make sure read_resp->req_id get updated before the hw can write + * there + */ + wmb(); + + writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); + + for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) { + if (read_resp->req_id == mmio_read->seq_num) + break; + + udelay(1); + } + + if (unlikely(i == ENA_REG_READ_TIMEOUT)) { + pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); + ret = ENA_MMIO_READ_TIMEOUT; + goto err; + } + + if (read_resp->reg_off != offset) { + pr_err("Read failure: wrong offset provided"); + ret = ENA_MMIO_READ_TIMEOUT; + } else { + ret = read_resp->reg_val; + } +err: + spin_unlock_irqrestore(&mmio_read->lock, flags); + + return ret; +} + +/* There are two types to wait for completion. + * Polling mode - wait until the completion is available. + * Async mode - wait on wait queue until the completion is ready + * (or the timeout expired). + * It is expected that the IRQ called ena_com_handle_admin_completion + * to mark the completions. + */ +static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + if (admin_queue->polling) + return ena_com_wait_and_process_admin_cq_polling(comp_ctx, + admin_queue); + + return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx, + admin_queue); +} + +static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_sq_cmd destroy_cmd; + struct ena_admin_acq_destroy_sq_resp_desc destroy_resp; + u8 direction; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + destroy_cmd.sq.sq_identity |= (direction << + ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_SQ_SQ_DIRECTION_MASK; + + destroy_cmd.sq.sq_idx = io_sq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("failed to destroy io sq error: %d\n", ret); + + return ret; +} + +static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, + struct ena_com_io_cq *io_cq) +{ + size_t size; + + if (io_cq->cdesc_addr.virt_addr) { + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + dma_free_coherent(ena_dev->dmadev, size, + io_cq->cdesc_addr.virt_addr, + io_cq->cdesc_addr.phys_addr); + + io_cq->cdesc_addr.virt_addr = NULL; + } + + if (io_sq->desc_addr.virt_addr) { + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + dma_free_coherent(ena_dev->dmadev, size, + io_sq->desc_addr.virt_addr, + io_sq->desc_addr.phys_addr); + else + devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr); + + io_sq->desc_addr.virt_addr = NULL; + } +} + +static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, + u16 exp_state) +{ + u32 val, i; + + for (i = 0; i < timeout; i++) { + val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == + exp_state) + return 0; + + /* The resolution of the timeout is 100ms */ + msleep(100); + } + + return -ETIME; +} + +static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev, + enum ena_admin_aq_feature_id feature_id) +{ + u32 feature_mask = 1 << feature_id; + + /* Device attributes is always supported */ + if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) && + !(ena_dev->supported_features & feature_mask)) + return false; + + return true; +} + +static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_get_feat_cmd get_cmd; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) { + pr_info("Feature %d isn't supported\n", feature_id); + return -EPERM; + } + + memset(&get_cmd, 0x0, sizeof(get_cmd)); + admin_queue = &ena_dev->admin_queue; + + get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE; + + if (control_buff_size) + get_cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + else + get_cmd.aq_common_descriptor.flags = 0; + + ret = ena_com_mem_addr_set(ena_dev, + &get_cmd.control_buffer.address, + control_buf_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + get_cmd.control_buffer.length = control_buff_size; + + get_cmd.feat_common.feature_id = feature_id; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *) + &get_cmd, + sizeof(get_cmd), + (struct ena_admin_acq_entry *) + get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to submit get_feature command %d error: %d\n", + feature_id, ret); + + return ret; +} + +static int ena_com_get_feature(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id) +{ + return ena_com_get_feature_ex(ena_dev, + get_resp, + feature_id, + 0, + 0); +} + +static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_key = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + &rss->hash_key_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_key)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_key) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + rss->hash_key, rss->hash_key_dma_addr); + rss->hash_key = NULL; +} + +static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_ctrl = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + &rss->hash_ctrl_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_ctrl)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_ctrl) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + rss->hash_ctrl, rss->hash_ctrl_dma_addr); + rss->hash_ctrl = NULL; +} + +static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, + u16 log_size) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + size_t tbl_size; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + if (unlikely(ret)) + return ret; + + if ((get_resp.u.ind_table.min_size > log_size) || + (get_resp.u.ind_table.max_size < log_size)) { + pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n", + 1 << log_size, 1 << get_resp.u.ind_table.min_size, + 1 << get_resp.u.ind_table.max_size); + return -EINVAL; + } + + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rss->rss_ind_tbl = + dma_zalloc_coherent(ena_dev->dmadev, tbl_size, + &rss->rss_ind_tbl_dma_addr, GFP_KERNEL); + if (unlikely(!rss->rss_ind_tbl)) + goto mem_err1; + + tbl_size = (1ULL << log_size) * sizeof(u16); + rss->host_rss_ind_tbl = + devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); + if (unlikely(!rss->host_rss_ind_tbl)) + goto mem_err2; + + rss->tbl_log_size = log_size; + + return 0; + +mem_err2: + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; +mem_err1: + rss->tbl_log_size = 0; + return -ENOMEM; +} + +static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + size_t tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + if (rss->rss_ind_tbl) + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; + + if (rss->host_rss_ind_tbl) + devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl); + rss->host_rss_ind_tbl = NULL; +} + +static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, u16 cq_idx) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_sq_cmd create_cmd; + struct ena_admin_acq_create_sq_resp_desc cmd_completion; + u8 direction; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ; + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + create_cmd.sq_identity |= (direction << + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK; + + create_cmd.sq_caps_2 |= io_sq->mem_queue_type & + ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK; + + create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC << + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK; + + create_cmd.sq_caps_3 |= + ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK; + + create_cmd.cq_idx = cq_idx; + create_cmd.sq_depth = io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.sq_ba, + io_sq->desc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO SQ. error: %d\n", ret); + return ret; + } + + io_sq->idx = cmd_completion.sq_idx; + + io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + (uintptr_t)cmd_completion.sq_doorbell_offset); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_headers_offset); + + io_sq->desc_addr.pbuf_dev_addr = + (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_descriptors_offset); + } + + pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth); + + return ret; +} + +static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_com_io_sq *io_sq; + u16 qid; + int i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + qid = rss->host_rss_ind_tbl[i]; + if (qid >= ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + io_sq = &ena_dev->io_sq_queues[qid]; + + if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX) + return -EINVAL; + + rss->rss_ind_tbl[i].cq_idx = io_sq->idx; + } + + return 0; +} + +static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev) +{ + u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 }; + struct ena_rss *rss = &ena_dev->rss; + u8 idx; + u16 i; + + for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++) + dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + idx = (u8)rss->rss_ind_tbl[i].cq_idx; + + if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx]; + } + + return 0; +} + +static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + size_t size; + + size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS; + + ena_dev->intr_moder_tbl = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + if (!ena_dev->intr_moder_tbl) + return -ENOMEM; + + ena_com_config_default_interrupt_moderation_table(ena_dev); + + return 0; +} + +static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, + u16 intr_delay_resolution) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int i; + + if (!intr_delay_resolution) { + pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n"); + intr_delay_resolution = 1; + } + ena_dev->intr_delay_resolution = intr_delay_resolution; + + /* update Rx */ + for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++) + intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution; + + /* update Tx */ + ena_dev->intr_moder_tx_interval /= intr_delay_resolution; +} + +/*****************************************************************************/ +/******************************* API ******************************/ +/*****************************************************************************/ + +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *comp, + size_t comp_size) +{ + struct ena_comp_ctx *comp_ctx; + int ret; + + comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size, + comp, comp_size); + if (unlikely(IS_ERR(comp_ctx))) { + pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx)); + return PTR_ERR(comp_ctx); + } + + ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue); + if (unlikely(ret)) { + if (admin_queue->running_state) + pr_err("Failed to process command. ret = %d\n", ret); + else + pr_debug("Failed to process command. ret = %d\n", ret); + } + return ret; +} + +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_cq_cmd create_cmd; + struct ena_admin_acq_create_cq_resp_desc cmd_completion; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ; + + create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) & + ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + create_cmd.cq_caps_1 |= + ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK; + + create_cmd.msix_vector = io_cq->msix_vector; + create_cmd.cq_depth = io_cq->q_depth; + + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.cq_ba, + io_cq->cdesc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO CQ. error: %d\n", ret); + return ret; + } + + io_cq->idx = cmd_completion.cq_idx; + + io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_interrupt_unmask_register_offset); + + if (cmd_completion.cq_head_db_register_offset) + io_cq->cq_head_db_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_head_db_register_offset); + + if (cmd_completion.numa_node_register_offset) + io_cq->numa_node_cfg_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.numa_node_register_offset); + + pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth); + + return ret; +} + +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq) +{ + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Invalid queue number %d but the max is %d\n", qid, + ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + *io_sq = &ena_dev->io_sq_queues[qid]; + *io_cq = &ena_dev->io_cq_queues[qid]; + + return 0; +} + +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_comp_ctx *comp_ctx; + u16 i; + + if (!admin_queue->comp_ctx) + return; + + for (i = 0; i < admin_queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(admin_queue, i, false); + if (unlikely(!comp_ctx)) + break; + + comp_ctx->status = ENA_CMD_ABORTED; + + complete(&comp_ctx->wait_event); + } +} + +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + while (atomic_read(&admin_queue->outstanding_cmds) != 0) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + msleep(20); + spin_lock_irqsave(&admin_queue->q_lock, flags); + } + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_cq_cmd destroy_cmd; + struct ena_admin_acq_destroy_cq_resp_desc destroy_resp; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + destroy_cmd.cq_idx = io_cq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("Failed to destroy IO CQ. error: %d\n", ret); + + return ret; +} + +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev) +{ + return ena_dev->admin_queue.running_state; +} + +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_dev->admin_queue.running_state = state; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev) +{ + u16 depth = ena_dev->aenq.q_depth; + + WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n"); + + /* Init head_db to mark that all entries in the queue + * are initially available + */ + writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG); + if (ret) { + pr_info("Can't get aenq configuration\n"); + return ret; + } + + if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) { + pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n", + get_resp.u.aenq.supported_groups, groups_flag); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG; + cmd.u.aenq.enabled_groups = groups_flag; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to config AENQ ret: %d\n", ret); + + return ret; +} + +int ena_com_get_dma_width(struct ena_com_dev *ena_dev) +{ + u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + int width; + + if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> + ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + + pr_debug("ENA dma width: %d\n", width); + + if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) { + pr_err("DMA width illegal value: %d\n", width); + return -EINVAL; + } + + ena_dev->dma_addr_bits = width; + + return width; +} + +int ena_com_validate_version(struct ena_com_dev *ena_dev) +{ + u32 ver; + u32 ctrl_ver; + u32 ctrl_ver_masked; + + /* Make sure the ENA version and the controller version are at least + * as the driver expects + */ + ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF); + ctrl_ver = ena_com_reg_bar_read32(ena_dev, + ENA_REGS_CONTROLLER_VERSION_OFF); + + if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || + (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + pr_info("ena device version: %d.%d\n", + (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & ENA_REGS_VERSION_MINOR_VERSION_MASK); + + if (ver < MIN_ENA_VER) { + pr_err("ENA version is lower than the minimal version the driver supports\n"); + return -1; + } + + pr_info("ena controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> + ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + + ctrl_ver_masked = + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); + + /* Validate the ctrl version without the implementation ID */ + if (ctrl_ver_masked < MIN_ENA_CTRL_VER) { + pr_err("ENA ctrl version is lower than the minimal ctrl version the driver supports\n"); + return -1; + } + + return 0; +} + +void ena_com_admin_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_com_admin_cq *cq = &admin_queue->cq; + struct ena_com_admin_sq *sq = &admin_queue->sq; + struct ena_com_aenq *aenq = &ena_dev->aenq; + u16 size; + + if (admin_queue->comp_ctx) + devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx); + admin_queue->comp_ctx = NULL; + size = ADMIN_SQ_SIZE(admin_queue->q_depth); + if (sq->entries) + dma_free_coherent(ena_dev->dmadev, size, sq->entries, + sq->dma_addr); + sq->entries = NULL; + + size = ADMIN_CQ_SIZE(admin_queue->q_depth); + if (cq->entries) + dma_free_coherent(ena_dev->dmadev, size, cq->entries, + cq->dma_addr); + cq->entries = NULL; + + size = ADMIN_AENQ_SIZE(aenq->q_depth); + if (ena_dev->aenq.entries) + dma_free_coherent(ena_dev->dmadev, size, aenq->entries, + aenq->dma_addr); + aenq->entries = NULL; +} + +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) +{ + ena_dev->admin_queue.polling = polling; +} + +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + spin_lock_init(&mmio_read->lock); + mmio_read->read_resp = + dma_zalloc_coherent(ena_dev->dmadev, + sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (unlikely(!mmio_read->read_resp)) + return -ENOMEM; + + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + mmio_read->read_resp->req_id = 0x0; + mmio_read->seq_num = 0x0; + mmio_read->readless_supported = true; + + return 0; +} + +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + mmio_read->readless_supported = readless_supported; +} + +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); + + dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), + mmio_read->read_resp, mmio_read->read_resp_dma_addr); + + mmio_read->read_resp = NULL; +} + +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + u32 addr_low, addr_high; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); +} + +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high; + int ret; + + dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) { + pr_err("Device isn't ready, abort com init\n"); + return -ENODEV; + } + + admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH; + + admin_queue->q_dmadev = ena_dev->dmadev; + admin_queue->polling = false; + admin_queue->curr_cmd_id = 0; + + atomic_set(&admin_queue->outstanding_cmds, 0); + + if (init_spinlock) + spin_lock_init(&admin_queue->q_lock); + + ret = ena_com_init_comp_ctxt(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_sq(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_cq(admin_queue); + if (ret) + goto error; + + admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + ENA_REGS_AQ_DB_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF); + + aq_caps = 0; + aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK; + aq_caps |= (sizeof(struct ena_admin_aq_entry) << + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + + acq_caps = 0; + acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; + acq_caps |= (sizeof(struct ena_admin_acq_entry) << + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; + + writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF); + writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF); + ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers); + if (ret) + goto error; + + admin_queue->running_state = true; + + return 0; +error: + ena_com_admin_destroy(ena_dev); + + return ret; +} + +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + int ret; + + if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", + ctx->qid, ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + io_sq = &ena_dev->io_sq_queues[ctx->qid]; + io_cq = &ena_dev->io_cq_queues[ctx->qid]; + + memset(io_sq, 0x0, sizeof(struct ena_com_io_sq)); + memset(io_cq, 0x0, sizeof(struct ena_com_io_cq)); + + /* Init CQ */ + io_cq->q_depth = ctx->queue_size; + io_cq->direction = ctx->direction; + io_cq->qid = ctx->qid; + + io_cq->msix_vector = ctx->msix_vector; + + io_sq->q_depth = ctx->queue_size; + io_sq->direction = ctx->direction; + io_sq->qid = ctx->qid; + + io_sq->mem_queue_type = ctx->mem_queue_type; + + if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + /* header length is limited to 8 bits */ + io_sq->tx_max_header_size = + min_t(u32, ena_dev->tx_max_header_size, SZ_256); + + ret = ena_com_init_io_sq(ena_dev, ctx, io_sq); + if (ret) + goto error; + ret = ena_com_init_io_cq(ena_dev, ctx, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_cq(ena_dev, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx); + if (ret) + goto destroy_io_cq; + + return 0; + +destroy_io_cq: + ena_com_destroy_io_cq(ena_dev, io_cq); +error: + ena_com_io_queue_free(ena_dev, io_sq, io_cq); + return ret; +} + +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", qid, + ENA_TOTAL_NUM_QUEUES); + return; + } + + io_sq = &ena_dev->io_sq_queues[qid]; + io_cq = &ena_dev->io_cq_queues[qid]; + + ena_com_destroy_io_sq(ena_dev, io_sq); + ena_com_destroy_io_cq(ena_dev, io_cq); + + ena_com_io_queue_free(ena_dev, io_sq, io_cq); +} + +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp) +{ + return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG); +} + +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_DEVICE_ATTRIBUTES); + if (rc) + return rc; + + memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr, + sizeof(get_resp.u.dev_attr)); + ena_dev->supported_features = get_resp.u.dev_attr.supported_features; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_MAX_QUEUES_NUM); + if (rc) + return rc; + + memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, + sizeof(get_resp.u.max_queue)); + ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_AENQ_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq, + sizeof(get_resp.u.aenq)); + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->offload, &get_resp.u.offload, + sizeof(get_resp.u.offload)); + + return 0; +} + +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev) +{ + ena_com_handle_admin_completion(&ena_dev->admin_queue); +} + +/* ena_handle_specific_aenq_event: + * return the handler that is relevant to the specific event group + */ +static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev, + u16 group) +{ + struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers; + + if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group]) + return aenq_handlers->handlers[group]; + + return aenq_handlers->unimplemented_handler; +} + +/* ena_aenq_intr_handler: + * handles the aenq incoming events. + * pop events from the queue and apply the specific handler + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) +{ + struct ena_admin_aenq_entry *aenq_e; + struct ena_admin_aenq_common_desc *aenq_common; + struct ena_com_aenq *aenq = &dev->aenq; + ena_aenq_handler handler_cb; + u16 masked_head, processed = 0; + u8 phase; + + masked_head = aenq->head & (aenq->q_depth - 1); + phase = aenq->phase; + aenq_e = &aenq->entries[masked_head]; /* Get first entry */ + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ + while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == + phase) { + pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", + aenq_common->group, aenq_common->syndrom, + (u64)aenq_common->timestamp_low + + ((u64)aenq_common->timestamp_high << 32)); + + /* Handle specific event*/ + handler_cb = ena_com_get_specific_aenq_cb(dev, + aenq_common->group); + handler_cb(data, aenq_e); /* call the actual event handler*/ + + /* Get next event entry */ + masked_head++; + processed++; + + if (unlikely(masked_head == aenq->q_depth)) { + masked_head = 0; + phase = !phase; + } + aenq_e = &aenq->entries[masked_head]; + aenq_common = &aenq_e->aenq_common_desc; + } + + aenq->head += processed; + aenq->phase = phase; + + /* Don't update aenq doorbell if there weren't any processed events */ + if (!processed) + return; + + /* write the aenq doorbell after all AENQ descriptors were read */ + mb(); + writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_dev_reset(struct ena_com_dev *ena_dev) +{ + u32 stat, timeout, cap, reset_val; + int rc; + + stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + + if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || + (cap == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read32 timeout occurred\n"); + return -ETIME; + } + + if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) { + pr_err("Device isn't ready, can't reset device\n"); + return -EINVAL; + } + + timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >> + ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + if (timeout == 0) { + pr_err("Invalid timeout value\n"); + return -EINVAL; + } + + /* start reset */ + reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK; + writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + + /* Write again the MMIO read request address */ + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + rc = wait_for_reset_state(ena_dev, timeout, + ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (rc != 0) { + pr_err("Reset indication didn't turn on\n"); + return rc; + } + + /* reset done */ + writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + rc = wait_for_reset_state(ena_dev, timeout, 0); + if (rc != 0) { + pr_err("Reset indication didn't turn off\n"); + return rc; + } + + return 0; +} + +static int ena_get_dev_stats(struct ena_com_dev *ena_dev, + struct ena_com_stats_ctx *ctx, + enum ena_admin_get_stats_type type) +{ + struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd; + struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp; + struct ena_com_admin_queue *admin_queue; + int ret; + + admin_queue = &ena_dev->admin_queue; + + get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS; + get_cmd->aq_common_descriptor.flags = 0; + get_cmd->type = type; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)get_cmd, + sizeof(*get_cmd), + (struct ena_admin_acq_entry *)get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to get stats. error: %d\n", ret); + + return ret; +} + +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats) +{ + struct ena_com_stats_ctx ctx; + int ret; + + memset(&ctx, 0x0, sizeof(ctx)); + ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC); + if (likely(ret == 0)) + memcpy(stats, &ctx.get_resp.basic_stats, + sizeof(ctx.get_resp.basic_stats)); + + return ret; +} + +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_MTU; + cmd.u.mtu.mtu = mtu; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set mtu %d. error: %d\n", mtu, ret); + + return ret; +} + +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload) +{ + int ret; + struct ena_admin_get_feat_resp resp; + + ret = ena_com_get_feature(ena_dev, &resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (unlikely(ret)) { + pr_err("Failed to get offload capabilities %d\n", ret); + return ret; + } + + memcpy(offload, &resp.u.offload, sizeof(resp.u.offload)); + + return 0; +} + +int ena_com_set_hash_function(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_FUNCTION)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_HASH_FUNCTION); + return -EPERM; + } + + /* Validate hash function is supported */ + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION); + if (unlikely(ret)) + return ret; + + if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { + pr_err("Func hash %d isn't supported by device, abort\n", + rss->hash_func); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION; + cmd.u.flow_hash_func.init_val = rss->hash_init_val; + cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_key_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = sizeof(*rss->hash_key); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) { + pr_err("Failed to set hash function %d. error: %d\n", + rss->hash_func, ret); + return -EINVAL; + } + + return 0; +} + +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + /* Make sure size is a mult of DWs */ + if (unlikely(key_len & 0x3)) + return -EINVAL; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { + pr_err("Flow hash function %d isn't supported\n", func); + return -EPERM; + } + + switch (func) { + case ENA_ADMIN_TOEPLITZ: + if (key_len > sizeof(hash_key->key)) { + pr_err("key len (%hu) is bigger than the max supported (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; + break; + case ENA_ADMIN_CRC32: + rss->hash_init_val = init_val; + break; + default: + pr_err("Invalid hash function (%d)\n", func); + return -EINVAL; + } + + rc = ena_com_set_hash_function(ena_dev); + + /* Restore the old function */ + if (unlikely(rc)) + ena_com_get_hash_function(ena_dev, NULL, NULL); + + return rc; +} + +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + rss->hash_func = get_resp.u.flow_hash_func.selected_func; + if (func) + *func = rss->hash_func; + + if (key) + memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2); + + return 0; +} + +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_INPUT, + rss->hash_ctrl_dma_addr, + sizeof(*rss->hash_ctrl)); + if (unlikely(rc)) + return rc; + + if (fields) + *fields = rss->hash_ctrl->selected_fields[proto].fields; + + return 0; +} + +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_INPUT)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT; + cmd.u.flow_hash_input.enabled_input_sort = + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK | + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_ctrl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + cmd.control_buffer.length = sizeof(*hash_ctrl); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) + pr_err("Failed to set hash input. error: %d\n", ret); + + return ret; +} + +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = + rss->hash_ctrl; + u16 available_fields = 0; + int rc, i; + + /* Get the supported hash input */ + rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL); + if (unlikely(rc)) + return rc; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA; + + for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) { + available_fields = hash_ctrl->selected_fields[i].fields & + hash_ctrl->supported_fields[i].fields; + if (available_fields != hash_ctrl->selected_fields[i].fields) { + pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n", + i, hash_ctrl->supported_fields[i].fields, + hash_ctrl->selected_fields[i].fields); + return -EPERM; + } + } + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return rc; +} + +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + u16 supported_fields; + int rc; + + if (proto >= ENA_ADMIN_RSS_PROTO_NUM) { + pr_err("Invalid proto num (%u)\n", proto); + return -EINVAL; + } + + /* Get the ctrl table */ + rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL); + if (unlikely(rc)) + return rc; + + /* Make sure all the fields are supported */ + supported_fields = hash_ctrl->supported_fields[proto].fields; + if ((hash_fields & supported_fields) != hash_fields) { + pr_err("proto %d doesn't support the required fields %x. supports only: %x\n", + proto, hash_fields, supported_fields); + } + + hash_ctrl->selected_fields[proto].fields = hash_fields; + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return 0; +} + +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (unlikely(entry_idx >= (1 << rss->tbl_log_size))) + return -EINVAL; + + if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES))) + return -EINVAL; + + rss->host_rss_ind_tbl[entry_idx] = entry_value; + + return 0; +} + +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id( + ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + return -EPERM; + } + + ret = ena_com_ind_tbl_convert_to_device(ena_dev); + if (ret) { + pr_err("Failed to convert host indirection table to device table\n"); + return ret; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG; + cmd.u.ind_table.size = rss->tbl_log_size; + cmd.u.ind_table.inline_index = 0xFFFFFFFF; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->rss_ind_tbl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set indirect table. error: %d\n", ret); + + return ret; +} + +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + u32 tbl_size; + int i, rc; + + tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, + rss->rss_ind_tbl_dma_addr, + tbl_size); + if (unlikely(rc)) + return rc; + + if (!ind_tbl) + return 0; + + rc = ena_com_ind_tbl_convert_from_device(ena_dev); + if (unlikely(rc)) + return rc; + + for (i = 0; i < (1 << rss->tbl_log_size); i++) + ind_tbl[i] = rss->host_rss_ind_tbl[i]; + + return 0; +} + +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) +{ + int rc; + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); + + rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size); + if (unlikely(rc)) + goto err_indr_tbl; + + rc = ena_com_hash_key_allocate(ena_dev); + if (unlikely(rc)) + goto err_hash_key; + + rc = ena_com_hash_ctrl_init(ena_dev); + if (unlikely(rc)) + goto err_hash_ctrl; + + return 0; + +err_hash_ctrl: + ena_com_hash_key_destroy(ena_dev); +err_hash_key: + ena_com_indirect_table_destroy(ena_dev); +err_indr_tbl: + + return rc; +} + +void ena_com_rss_destroy(struct ena_com_dev *ena_dev) +{ + ena_com_indirect_table_destroy(ena_dev); + ena_com_hash_key_destroy(ena_dev); + ena_com_hash_ctrl_destroy(ena_dev); + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); +} + +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->host_info = + dma_zalloc_coherent(ena_dev->dmadev, SZ_4K, + &host_attr->host_info_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->host_info)) + return -ENOMEM; + + return 0; +} + +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->debug_area_virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, debug_area_size, + &host_attr->debug_area_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->debug_area_virt_addr)) { + host_attr->debug_area_size = 0; + return -ENOMEM; + } + + host_attr->debug_area_size = debug_area_size; + + return 0; +} + +void ena_com_delete_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->host_info) { + dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info, + host_attr->host_info_dma_addr); + host_attr->host_info = NULL; + } +} + +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->debug_area_virt_addr) { + dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size, + host_attr->debug_area_virt_addr, + host_attr->debug_area_dma_addr); + host_attr->debug_area_virt_addr = NULL; + } +} + +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_HOST_ATTR_CONFIG)) { + pr_warn("Set host attribute isn't supported\n"); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.debug_ba, + host_attr->debug_area_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.os_info_ba, + host_attr->host_info_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.u.host_attr.debug_area_size = host_attr->debug_area_size; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set host attributes: %d\n", ret); + + return ret; +} + +/* Interrupt moderation */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev) +{ + return ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_INTERRUPT_MODERATION); +} + +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + ena_dev->intr_moder_tx_interval = tx_coalesce_usecs / + ena_dev->intr_delay_resolution; + + return 0; +} + +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + /* We use LOWEST entry of moderation table for storing + * nonadaptive interrupt coalescing values + */ + ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + rx_coalesce_usecs / ena_dev->intr_delay_resolution; + + return 0; +} + +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + if (ena_dev->intr_moder_tbl) + devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl); + ena_dev->intr_moder_tbl = NULL; +} + +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + struct ena_admin_get_feat_resp get_resp; + u16 delay_resolution; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_INTERRUPT_MODERATION); + + if (rc) { + if (rc == -EPERM) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_INTERRUPT_MODERATION); + rc = 0; + } else { + pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n", + rc); + } + + /* no moderation supported, disable adaptive support */ + ena_com_disable_adaptive_moderation(ena_dev); + return rc; + } + + rc = ena_com_init_interrupt_moderation_table(ena_dev); + if (rc) + goto err; + + /* if moderation is supported by device we set adaptive moderation */ + delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; + ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); + ena_com_enable_adaptive_moderation(ena_dev); + + return 0; +err: + ena_com_destroy_interrupt_moderation(ena_dev); + return rc; +} + +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (!intr_moder_tbl) + return; + + intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + ENA_INTR_LOWEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval = + ENA_INTR_LOWEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval = + ENA_INTR_LOWEST_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval = + ENA_INTR_LOW_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval = + ENA_INTR_LOW_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval = + ENA_INTR_LOW_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval = + ENA_INTR_MID_USECS; + intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval = + ENA_INTR_MID_PKTS; + intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval = + ENA_INTR_MID_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval = + ENA_INTR_HIGH_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval = + ENA_INTR_HIGH_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval = + ENA_INTR_HIGH_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval = + ENA_INTR_HIGHEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval = + ENA_INTR_HIGHEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval = + ENA_INTR_HIGHEST_BYTES; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev) +{ + return ena_dev->intr_moder_tx_interval; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (intr_moder_tbl) + return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval; + + return 0; +} + +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval; + if (ena_dev->intr_delay_resolution) + intr_moder_tbl[level].intr_moder_interval /= + ena_dev->intr_delay_resolution; + intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval; + + /* use hardcoded value until ethtool supports bytecount parameter */ + if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED) + intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval; +} + +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval; + if (ena_dev->intr_delay_resolution) + entry->intr_moder_interval *= ena_dev->intr_delay_resolution; + entry->pkts_per_interval = + intr_moder_tbl[level].pkts_per_interval; + entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h new file mode 100644 index 000000000000..509d7b8e15ab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -0,0 +1,1038 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_COM +#define ENA_COM + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_common_defs.h" +#include "ena_admin_defs.h" +#include "ena_eth_io_defs.h" +#include "ena_regs_defs.h" + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define ENA_MAX_NUM_IO_QUEUES 128U +/* We need to queues for each IO (on for Tx and one for Rx) */ +#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) + +#define ENA_MAX_HANDLERS 256 + +#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48 + +/* Unit in usec */ +#define ENA_REG_READ_TIMEOUT 200000 + +#define ADMIN_SQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aq_entry)) +#define ADMIN_CQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_acq_entry)) +#define ADMIN_AENQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aenq_entry)) + +/*****************************************************************************/ +/*****************************************************************************/ +/* ENA adaptive interrupt moderation settings */ + +#define ENA_INTR_LOWEST_USECS (0) +#define ENA_INTR_LOWEST_PKTS (3) +#define ENA_INTR_LOWEST_BYTES (2 * 1524) + +#define ENA_INTR_LOW_USECS (32) +#define ENA_INTR_LOW_PKTS (12) +#define ENA_INTR_LOW_BYTES (16 * 1024) + +#define ENA_INTR_MID_USECS (80) +#define ENA_INTR_MID_PKTS (48) +#define ENA_INTR_MID_BYTES (64 * 1024) + +#define ENA_INTR_HIGH_USECS (128) +#define ENA_INTR_HIGH_PKTS (96) +#define ENA_INTR_HIGH_BYTES (128 * 1024) + +#define ENA_INTR_HIGHEST_USECS (192) +#define ENA_INTR_HIGHEST_PKTS (128) +#define ENA_INTR_HIGHEST_BYTES (192 * 1024) + +#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196 +#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 4 +#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT 6 +#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT 4 +#define ENA_INTR_MODER_LEVEL_STRIDE 2 +#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF + +enum ena_intr_moder_level { + ENA_INTR_MODER_LOWEST = 0, + ENA_INTR_MODER_LOW, + ENA_INTR_MODER_MID, + ENA_INTR_MODER_HIGH, + ENA_INTR_MODER_HIGHEST, + ENA_INTR_MAX_NUM_OF_LEVELS, +}; + +struct ena_intr_moder_entry { + unsigned int intr_moder_interval; + unsigned int pkts_per_interval; + unsigned int bytes_per_interval; +}; + +enum queue_direction { + ENA_COM_IO_QUEUE_DIRECTION_TX, + ENA_COM_IO_QUEUE_DIRECTION_RX +}; + +struct ena_com_buf { + dma_addr_t paddr; /**< Buffer physical address */ + u16 len; /**< Buffer length in bytes */ +}; + +struct ena_com_rx_buf_info { + u16 len; + u16 req_id; +}; + +struct ena_com_io_desc_addr { + u8 __iomem *pbuf_dev_addr; /* LLQ address */ + u8 *virt_addr; + dma_addr_t phys_addr; +}; + +struct ena_com_tx_meta { + u16 mss; + u16 l3_hdr_len; + u16 l3_hdr_offset; + u16 l4_hdr_len; /* In words */ +}; + +struct ena_com_io_cq { + struct ena_com_io_desc_addr cdesc_addr; + + /* Interrupt unmask register */ + u32 __iomem *unmask_reg; + + /* The completion queue head doorbell register */ + u32 __iomem *cq_head_db_reg; + + /* numa configuration register (for TPH) */ + u32 __iomem *numa_node_cfg_reg; + + /* The value to write to the above register to unmask + * the interrupt of this queue + */ + u32 msix_vector; + + enum queue_direction direction; + + /* holds the number of cdesc of the current packet */ + u16 cur_rx_pkt_cdesc_count; + /* save the firt cdesc idx of the current packet */ + u16 cur_rx_pkt_cdesc_start_idx; + + u16 q_depth; + /* Caller qid */ + u16 qid; + + /* Device queue index */ + u16 idx; + u16 head; + u16 last_head_update; + u8 phase; + u8 cdesc_entry_size_in_bytes; + +} ____cacheline_aligned; + +struct ena_com_io_sq { + struct ena_com_io_desc_addr desc_addr; + + u32 __iomem *db_addr; + u8 __iomem *header_addr; + + enum queue_direction direction; + enum ena_admin_placement_policy_type mem_queue_type; + + u32 msix_vector; + struct ena_com_tx_meta cached_tx_meta; + + u16 q_depth; + u16 qid; + + u16 idx; + u16 tail; + u16 next_to_comp; + u32 tx_max_header_size; + u8 phase; + u8 desc_entry_size; + u8 dma_addr_bits; +} ____cacheline_aligned; + +struct ena_com_admin_cq { + struct ena_admin_acq_entry *entries; + dma_addr_t dma_addr; + + u16 head; + u8 phase; +}; + +struct ena_com_admin_sq { + struct ena_admin_aq_entry *entries; + dma_addr_t dma_addr; + + u32 __iomem *db_addr; + + u16 head; + u16 tail; + u8 phase; + +}; + +struct ena_com_stats_admin { + u32 aborted_cmd; + u32 submitted_cmd; + u32 completed_cmd; + u32 out_of_space; + u32 no_completion; +}; + +struct ena_com_admin_queue { + void *q_dmadev; + spinlock_t q_lock; /* spinlock for the admin queue */ + struct ena_comp_ctx *comp_ctx; + u16 q_depth; + struct ena_com_admin_cq cq; + struct ena_com_admin_sq sq; + + /* Indicate if the admin queue should poll for completion */ + bool polling; + + u16 curr_cmd_id; + + /* Indicate that the ena was initialized and can + * process new admin commands + */ + bool running_state; + + /* Count the number of outstanding admin commands */ + atomic_t outstanding_cmds; + + struct ena_com_stats_admin stats; +}; + +struct ena_aenq_handlers; + +struct ena_com_aenq { + u16 head; + u8 phase; + struct ena_admin_aenq_entry *entries; + dma_addr_t dma_addr; + u16 q_depth; + struct ena_aenq_handlers *aenq_handlers; +}; + +struct ena_com_mmio_read { + struct ena_admin_ena_mmio_req_read_less_resp *read_resp; + dma_addr_t read_resp_dma_addr; + u16 seq_num; + bool readless_supported; + /* spin lock to ensure a single outstanding read */ + spinlock_t lock; +}; + +struct ena_rss { + /* Indirect table */ + u16 *host_rss_ind_tbl; + struct ena_admin_rss_ind_table_entry *rss_ind_tbl; + dma_addr_t rss_ind_tbl_dma_addr; + u16 tbl_log_size; + + /* Hash key */ + enum ena_admin_hash_functions hash_func; + struct ena_admin_feature_rss_flow_hash_control *hash_key; + dma_addr_t hash_key_dma_addr; + u32 hash_init_val; + + /* Flow Control */ + struct ena_admin_feature_rss_hash_control *hash_ctrl; + dma_addr_t hash_ctrl_dma_addr; + +}; + +struct ena_host_attribute { + /* Debug area */ + u8 *debug_area_virt_addr; + dma_addr_t debug_area_dma_addr; + u32 debug_area_size; + + /* Host information */ + struct ena_admin_host_info *host_info; + dma_addr_t host_info_dma_addr; +}; + +/* Each ena_dev is a PCI function. */ +struct ena_com_dev { + struct ena_com_admin_queue admin_queue; + struct ena_com_aenq aenq; + struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES]; + struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES]; + u8 __iomem *reg_bar; + void __iomem *mem_bar; + void *dmadev; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + u32 tx_max_header_size; + u16 stats_func; /* Selected function for extended statistic dump */ + u16 stats_queue; /* Selected queue for extended statistic dump */ + + struct ena_com_mmio_read mmio_read; + + struct ena_rss rss; + u32 supported_features; + u32 dma_addr_bits; + + struct ena_host_attribute host_attr; + bool adaptive_coalescing; + u16 intr_delay_resolution; + u32 intr_moder_tx_interval; + struct ena_intr_moder_entry *intr_moder_tbl; +}; + +struct ena_com_dev_get_features_ctx { + struct ena_admin_queue_feature_desc max_queues; + struct ena_admin_device_attr_feature_desc dev_attr; + struct ena_admin_feature_aenq_desc aenq; + struct ena_admin_feature_offload_desc offload; +}; + +struct ena_com_create_io_ctx { + enum ena_admin_placement_policy_type mem_queue_type; + enum queue_direction direction; + int numa_node; + u32 msix_vector; + u16 queue_size; + u16 qid; +}; + +typedef void (*ena_aenq_handler)(void *data, + struct ena_admin_aenq_entry *aenq_e); + +/* Holds aenq handlers. Indexed by AENQ event group */ +struct ena_aenq_handlers { + ena_aenq_handler handlers[ENA_MAX_HANDLERS]; + ena_aenq_handler unimplemented_handler; +}; + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * + * Initialize the register read mechanism. + * + * @note: This method must be the first stage in the initialization sequence. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev); + +/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * @readless_supported: readless mode (enable/disable) + */ +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, + bool readless_supported); + +/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return + * value physical address. + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev); + +/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_admin_init - Init the admin and the async queues + * @ena_dev: ENA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. + * @init_spinlock: Indicate if this method should init the admin spinlock or + * the spinlock was init before (for example, in a case of FLR). + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock); + +/* ena_com_admin_destroy - Destroy the admin and the async events queues. + * @ena_dev: ENA communication layer struct + * + * @note: Before calling this method, the caller must validate that the device + * won't send any additional admin completions/aenq. + * To achieve that, a FLR is recommended. + */ +void ena_com_admin_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_dev_reset - Perform device FLR to the device. + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_dev_reset(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_queue - Create io queue. + * @ena_dev: ENA communication layer struct + * @ctx - create context structure + * + * Create the submission and the completion queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx); + +/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid. + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + */ +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid); + +/* ena_com_get_io_handlers - Return the io queue handlers + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + * @io_sq - IO submission queue handler + * @io_cq - IO completion queue handler. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq); + +/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications + * @ena_dev: ENA communication layer struct + * + * After this method, aenq event can be received via AENQ. + */ +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_running_state - Set the state of the admin queue + * @ena_dev: ENA communication layer struct + * + * Change the state of the admin queue (enable/disable) + */ +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state); + +/* ena_com_get_admin_running_state - Get the admin queue state + * @ena_dev: ENA communication layer struct + * + * Retrieve the state of the admin queue (enable/disable) + * + * @return - current polling mode (enable/disable) + */ +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * @polling: ENAble/Disable polling mode + * + * Set the admin completion mode. + */ +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling); + +/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * + * Get the admin completion mode. + * If polling mode is on, ena_com_execute_admin_command will perform a + * polling on the admin completion queue for the commands completion, + * otherwise it will wait on wait event. + * + * @return state + */ +bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); + +/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the admin completion queue and wake up all the pending + * threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. + */ +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev); + +/* ena_com_aenq_intr_handler - AENQ interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the async event notification queue and call the proper + * aenq handler. + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data); + +/* ena_com_abort_admin_commands - Abort all the outstanding admin commands. + * @ena_dev: ENA communication layer struct + * + * This method aborts all the outstanding admin commands. + * The caller should then call ena_com_wait_for_abort_completion to make sure + * all the commands were completed. + */ +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev); + +/* ena_com_wait_for_abort_completion - Wait for admin commands abort. + * @ena_dev: ENA communication layer struct + * + * This method wait until all the outstanding admin commands will be completed. + */ +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev); + +/* ena_com_validate_version - Validate the device parameters + * @ena_dev: ENA communication layer struct + * + * This method validate the device parameters are the same as the saved + * parameters in ena_dev. + * This method is useful after device reset, to validate the device mac address + * and the device offloads are the same as before the reset. + * + * @return - 0 on success negative value otherwise. + */ +int ena_com_validate_version(struct ena_com_dev *ena_dev); + +/* ena_com_get_link_params - Retrieve physical link parameters. + * @ena_dev: ENA communication layer struct + * @resp: Link parameters + * + * Retrieve the physical link parameters, + * like speed, auto-negotiation and full duplex support. + * + * @return - 0 on Success negative value otherwise. + */ +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp); + +/* ena_com_get_dma_width - Retrieve physical dma address width the device + * supports. + * @ena_dev: ENA communication layer struct + * + * Retrieve the maximum physical address bits the device can handle. + * + * @return: > 0 on Success and negative value otherwise. + */ +int ena_com_get_dma_width(struct ena_com_dev *ena_dev); + +/* ena_com_set_aenq_config - Set aenq groups configurations + * @ena_dev: ENA communication layer struct + * @groups flag: bit fields flags of enum ena_admin_aenq_group. + * + * Configure which aenq event group the driver would like to receive. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag); + +/* ena_com_get_dev_attr_feat - Get device features + * @ena_dev: ENA communication layer struct + * @get_feat_ctx: returned context that contain the get features. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx); + +/* ena_com_get_dev_basic_stats - Get device basic statistics + * @ena_dev: ENA communication layer struct + * @stats: stats return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats); + +/* ena_com_set_dev_mtu - Configure the device mtu. + * @ena_dev: ENA communication layer struct + * @mtu: mtu value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu); + +/* ena_com_get_offload_settings - Retrieve the device offloads capabilities + * @ena_dev: ENA communication layer struct + * @offlad: offload return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload); + +/* ena_com_rss_init - Init RSS + * @ena_dev: ENA communication layer struct + * @log_size: indirection log size + * + * Allocate RSS/RFS resources. + * The caller then can configure rss using ena_com_set_hash_function, + * ena_com_set_hash_ctrl and ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); + +/* ena_com_rss_destroy - Destroy rss + * @ena_dev: ENA communication layer struct + * + * Free all the RSS/RFS resources. + */ +void ena_com_rss_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_fill_hash_function - Fill RSS hash function + * @ena_dev: ENA communication layer struct + * @func: The hash function (Toeplitz or crc) + * @key: Hash key (for toeplitz hash) + * @key_len: key length (max length 10 DW) + * @init_val: initial value for the hash function + * + * Fill the ena_dev resources with the desire hash function, hash key, key_len + * and key initial value (if needed by the hash function). + * To flush the key into the device the caller should call + * ena_com_set_hash_function. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val); + +/* ena_com_set_hash_function - Flush the hash function and it dependencies to + * the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash function and it dependencies (key, key length and + * initial value) if needed. + * + * @note: Prior to this method the caller should call ena_com_fill_hash_function + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_function(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_function - Retrieve the hash function and the hash key + * from the device. + * @ena_dev: ENA communication layer struct + * @func: hash function + * @key: hash key + * + * Retrieve the hash function and the hash key from the device. + * + * @note: If the caller called ena_com_fill_hash_function but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key); + +/* ena_com_fill_hash_ctrl - Fill RSS hash control + * @ena_dev: ENA communication layer struct. + * @proto: The protocol to configure. + * @hash_fields: bit mask of ena_admin_flow_hash_fields + * + * Fill the ena_dev resources with the desire hash control (the ethernet + * fields that take part of the hash) for a specific protocol. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields); + +/* ena_com_set_hash_ctrl - Flush the hash control resources to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash control (the ethernet fields that take part of the hash) + * + * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_ctrl - Retrieve the hash control from the device. + * @ena_dev: ENA communication layer struct + * @proto: The protocol to retrieve. + * @fields: bit mask of ena_admin_flow_hash_fields. + * + * Retrieve the hash control from the device. + * + * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields); + +/* ena_com_set_default_hash_ctrl - Set the hash control to a default + * configuration. + * @ena_dev: ENA communication layer struct + * + * Fill the ena_dev resources with the default hash control configuration. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS + * indirection table + * @ena_dev: ENA communication layer struct. + * @entry_idx - indirection table entry. + * @entry_value - redirection value + * + * Fill a single entry of the RSS indirection table in the ena_dev resources. + * To flush the indirection table to the device, the called should call + * ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value); + +/* ena_com_indirect_table_set - Flush the indirection table to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the indirection hash control to the device. + * Prior to this method the caller should call ena_com_indirect_table_fill_entry + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_get - Retrieve the indirection table from the device. + * @ena_dev: ENA communication layer struct + * @ind_tbl: indirection table + * + * Retrieve the RSS indirection table from the device. + * + * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl); + +/* ena_com_allocate_host_info - Allocate host info resources. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_allocate_debug_area - Allocate debug area. + * @ena_dev: ENA communication layer struct + * @debug_area_size - debug area size. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size); + +/* ena_com_delete_debug_area - Free the debug area resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate debug area. + */ +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev); + +/* ena_com_delete_host_info - Free the host info resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate host info. + */ +void ena_com_delete_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_set_host_attributes - Update the device with the host + * attributes (debug area and host info) base address. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_cq - Create io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Create IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_destroy_io_cq - Destroy io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Destroy IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_execute_admin_command - Execute admin command + * @admin_queue: admin queue. + * @cmd: the admin command to execute. + * @cmd_size: the command size. + * @cmd_completion: command completion return value. + * @cmd_comp_size: command completion size. + + * Submit an admin command and then wait until the device will return a + * completion. + * The completion will be copyed into cmd_comp. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *cmd_comp, + size_t cmd_comp_size); + +/* ena_com_init_interrupt_moderation - Init interrupt moderation + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources + * @ena_dev: ENA communication layer struct + */ +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_interrupt_moderation_supported - Return if interrupt moderation + * capability is supported by the device. + * + * @return - supported or not. + */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev); + +/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt + * moderation table back to the default parameters. + * @ena_dev: ENA communication layer struct + */ +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev); + +/* ena_com_update_nonadaptive_moderation_interval_tx - Update the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * @tx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs); + +/* ena_com_update_nonadaptive_moderation_interval_rx - Update the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * @rx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs); + +/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev); + +/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev); + +/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt + * moderation table. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry value + * + * Update a single entry in the interrupt moderation table. + */ +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry to fill. + * + * Initialize the entry according to the adaptive interrupt moderation table. + */ +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev) +{ + return ena_dev->adaptive_coalescing; +} + +static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = true; +} + +static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = false; +} + +/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay + * @ena_dev: ENA communication layer struct + * @pkts: Number of packets since the last update + * @bytes: Number of bytes received since the last update. + * @smoothed_interval: Returned interval + * @moder_tbl_idx: Current table level as input update new level as return + * value. + */ +static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev, + unsigned int pkts, + unsigned int bytes, + unsigned int *smoothed_interval, + unsigned int *moder_tbl_idx) +{ + enum ena_intr_moder_level curr_moder_idx, new_moder_idx; + struct ena_intr_moder_entry *curr_moder_entry; + struct ena_intr_moder_entry *pred_moder_entry; + struct ena_intr_moder_entry *new_moder_entry; + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int interval; + + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + if (!pkts || !bytes) + /* Tx interrupt, or spurious interrupt, + * in both cases we just use same delay values + */ + return; + + curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx); + if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) { + pr_err("Wrong moderation index %u\n", curr_moder_idx); + return; + } + + curr_moder_entry = &intr_moder_tbl[curr_moder_idx]; + new_moder_idx = curr_moder_idx; + + if (curr_moder_idx == ENA_INTR_MODER_LOWEST) { + if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } else { + pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE]; + + if ((pkts <= pred_moder_entry->pkts_per_interval) || + (bytes <= pred_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE); + else if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) { + if (curr_moder_idx != ENA_INTR_MODER_HIGHEST) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } + } + new_moder_entry = &intr_moder_tbl[new_moder_idx]; + + interval = new_moder_entry->intr_moder_interval; + *smoothed_interval = ( + (interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT + + ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) / + 10; + + *moder_tbl_idx = new_moder_idx; +} + +/* ena_com_update_intr_reg - Prepare interrupt register + * @intr_reg: interrupt register to update. + * @rx_delay_interval: Rx interval in usecs + * @tx_delay_interval: Tx interval in usecs + * @unmask: unask enable/disable + * + * Prepare interrupt update register with the supplied parameters. + */ +static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg, + u32 rx_delay_interval, + u32 tx_delay_interval, + bool unmask) +{ + intr_reg->intr_control = 0; + intr_reg->intr_control |= rx_delay_interval & + ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK; + + intr_reg->intr_control |= + (tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT) + & ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK; + + if (unmask) + intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK; +} + +#endif /* !(ENA_COM) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h new file mode 100644 index 000000000000..bb8d73676eab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h @@ -0,0 +1,48 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_COMMON_H_ +#define _ENA_COMMON_H_ + +#define ENA_COMMON_SPEC_VERSION_MAJOR 0 /* */ +#define ENA_COMMON_SPEC_VERSION_MINOR 10 /* */ + +/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */ +struct ena_common_mem_addr { + u32 mem_addr_low; + + u16 mem_addr_high; + + /* MBZ */ + u16 reserved16; +}; + +#endif /*_ENA_COMMON_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c new file mode 100644 index 000000000000..539c536464a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -0,0 +1,501 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_eth_com.h" + +static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( + struct ena_com_io_cq *io_cq) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 expected_phase, head_masked; + u16 desc_phase; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr + + (head_masked * io_cq->cdesc_entry_size_in_bytes)); + + desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT; + + if (desc_phase != expected_phase) + return NULL; + + return cdesc; +} + +static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq) +{ + io_cq->head++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0)) + io_cq->phase ^= 1; +} + +static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked; + u32 offset; + + tail_masked = io_sq->tail & (io_sq->q_depth - 1); + + offset = tail_masked * io_sq->desc_entry_size; + + return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); +} + +static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u32 offset = tail_masked * io_sq->desc_entry_size; + + /* In case this queue isn't a LLQ */ + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return; + + memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset, + io_sq->desc_addr.virt_addr + offset, + io_sq->desc_entry_size); +} + +static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) +{ + io_sq->tail++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) + io_sq->phase ^= 1; +} + +static inline int ena_com_write_header(struct ena_com_io_sq *io_sq, + u8 *head_src, u16 header_len) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u8 __iomem *dev_head_addr = + io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return 0; + + if (unlikely(!io_sq->header_addr)) { + pr_err("Push buffer header ptr is NULL\n"); + return -EINVAL; + } + + memcpy_toio(dev_head_addr, head_src, header_len); + + return 0; +} + +static inline struct ena_eth_io_rx_cdesc_base * + ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx) +{ + idx &= (io_cq->q_depth - 1); + return (struct ena_eth_io_rx_cdesc_base *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + idx * io_cq->cdesc_entry_size_in_bytes); +} + +static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, + u16 *first_cdesc_idx) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 count = 0, head_masked; + u32 last = 0; + + do { + cdesc = ena_com_get_next_rx_cdesc(io_cq); + if (!cdesc) + break; + + ena_com_cq_inc_head(io_cq); + count++; + last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; + } while (!last); + + if (last) { + *first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx; + count += io_cq->cur_rx_pkt_cdesc_count; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + + io_cq->cur_rx_pkt_cdesc_count = 0; + io_cq->cur_rx_pkt_cdesc_start_idx = head_masked; + + pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n", + io_cq->qid, *first_cdesc_idx, count); + } else { + io_cq->cur_rx_pkt_cdesc_count += count; + count = 0; + } + + return count; +} + +static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + int rc; + + if (ena_tx_ctx->meta_valid) { + rc = memcmp(&io_sq->cached_tx_meta, + &ena_tx_ctx->ena_meta, + sizeof(struct ena_com_tx_meta)); + + if (unlikely(rc != 0)) + return true; + } + + return false; +} + +static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + struct ena_eth_io_tx_meta_desc *meta_desc = NULL; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + + meta_desc = get_sq_desc(io_sq); + memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc)); + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK; + + /* bits 0-9 of the mss */ + meta_desc->word2 |= (ena_meta->mss << + ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK; + /* bits 10-13 of the mss */ + meta_desc->len_ctrl |= ((ena_meta->mss >> 10) << + ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK; + + /* Extended meta desc */ + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK; + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + meta_desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_META_DESC_PHASE_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK; + meta_desc->word2 |= ena_meta->l3_hdr_len & + ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK; + meta_desc->word2 |= (ena_meta->l3_hdr_offset << + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK; + + meta_desc->word2 |= (ena_meta->l4_hdr_len << + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + + /* Cached the meta desc */ + memcpy(&io_sq->cached_tx_meta, ena_meta, + sizeof(struct ena_com_tx_meta)); + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); +} + +static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, + struct ena_eth_io_rx_cdesc_base *cdesc) +{ + ena_rx_ctx->l3_proto = cdesc->status & + ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK; + ena_rx_ctx->l4_proto = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; + ena_rx_ctx->l3_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + ena_rx_ctx->l4_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + ena_rx_ctx->hash = cdesc->hash; + ena_rx_ctx->frag = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT; + + pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n", + ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, + ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err, + ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); +} + +/*****************************************************************************/ +/***************************** API **********************************/ +/*****************************************************************************/ + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc) +{ + struct ena_eth_io_tx_desc *desc = NULL; + struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs; + void *push_header = ena_tx_ctx->push_header; + u16 header_len = ena_tx_ctx->header_len; + u16 num_bufs = ena_tx_ctx->num_bufs; + int total_desc, i, rc; + bool have_meta; + u64 addr_hi; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type"); + + /* num_bufs +1 for potential meta desc */ + if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) { + pr_err("Not enough space in the tx queue\n"); + return -ENOMEM; + } + + if (unlikely(header_len > io_sq->tx_max_header_size)) { + pr_err("header size is too large %d max header: %d\n", + header_len, io_sq->tx_max_header_size); + return -EINVAL; + } + + /* start with pushing the header (if needed) */ + rc = ena_com_write_header(io_sq, push_header, header_len); + if (unlikely(rc)) + return rc; + + have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq, + ena_tx_ctx); + if (have_meta) + ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); + + /* If the caller doesn't want send packets */ + if (unlikely(!num_bufs && !header_len)) { + *nb_hw_desc = have_meta ? 0 : 1; + return 0; + } + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + /* Set first desc when we don't have meta descriptor */ + if (!have_meta) + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK; + + desc->buff_addr_hi_hdr_sz |= (header_len << + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) & + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK; + desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK; + + /* Bits 0-9 */ + desc->meta_ctrl |= (ena_tx_ctx->req_id << + ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK; + + desc->meta_ctrl |= (ena_tx_ctx->df << + ENA_ETH_IO_TX_DESC_DF_SHIFT) & + ENA_ETH_IO_TX_DESC_DF_MASK; + + /* Bits 10-15 */ + desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) << + ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK; + + if (ena_tx_ctx->meta_valid) { + desc->meta_ctrl |= (ena_tx_ctx->tso_enable << + ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_TSO_EN_MASK; + desc->meta_ctrl |= ena_tx_ctx->l3_proto & + ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_proto << + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable << + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable << + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial << + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK; + } + + for (i = 0; i < num_bufs; i++) { + /* The first desc share the same desc as the header */ + if (likely(i != 0)) { + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + } + + desc->len_ctrl |= ena_bufs->len & + ENA_ETH_IO_TX_DESC_LENGTH_MASK; + + addr_hi = ((ena_bufs->paddr & + GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + desc->buff_addr_lo = (u32)ena_bufs->paddr; + desc->buff_addr_hi_hdr_sz |= addr_hi & + ENA_ETH_IO_TX_DESC_ADDR_HI_MASK; + ena_bufs++; + } + + /* set the last desc indicator */ + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK; + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + + ena_com_sq_update_tail(io_sq); + + total_desc = max_t(u16, num_bufs, 1); + total_desc += have_meta ? 1 : 0; + + *nb_hw_desc = total_desc; + return 0; +} + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx) +{ + struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0]; + struct ena_eth_io_rx_cdesc_base *cdesc = NULL; + u16 cdesc_idx = 0; + u16 nb_hw_desc; + u16 i; + + WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx); + if (nb_hw_desc == 0) { + ena_rx_ctx->descs = nb_hw_desc; + return 0; + } + + pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, + nb_hw_desc); + + if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) { + pr_err("Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, + ena_rx_ctx->max_bufs); + return -ENOSPC; + } + + for (i = 0; i < nb_hw_desc; i++) { + cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i); + + ena_buf->len = cdesc->length; + ena_buf->req_id = cdesc->req_id; + ena_buf++; + } + + /* Update SQ head ptr */ + io_sq->next_to_comp += nb_hw_desc; + + pr_debug("[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid, + io_sq->next_to_comp); + + /* Get rx flags from the last pkt */ + ena_com_rx_set_flags(ena_rx_ctx, cdesc); + + ena_rx_ctx->descs = nb_hw_desc; + return 0; +} + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id) +{ + struct ena_eth_io_rx_desc *desc; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + if (unlikely(ena_com_sq_empty_space(io_sq) == 0)) + return -ENOSPC; + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc)); + + desc->length = ena_buf->len; + + desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK; + desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; + + desc->req_id = req_id; + + desc->buff_addr_lo = (u32)ena_buf->paddr; + desc->buff_addr_hi = + ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + ena_com_sq_update_tail(io_sq); + + return 0; +} + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) +{ + u8 expected_phase, cdesc_phase; + struct ena_eth_io_tx_cdesc *cdesc; + u16 masked_head; + + masked_head = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_tx_cdesc *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + (masked_head * io_cq->cdesc_entry_size_in_bytes)); + + /* When the current completion descriptor phase isn't the same as the + * expected, it mean that the device still didn't update + * this completion. + */ + cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK; + if (cdesc_phase != expected_phase) + return -EAGAIN; + + ena_com_cq_inc_head(io_cq); + + *req_id = cdesc->req_id; + + return 0; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h new file mode 100644 index 000000000000..bb53c3a4f8e9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_ETH_COM_H_ +#define ENA_ETH_COM_H_ + +#include "ena_com.h" + +/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */ +#define ENA_COMP_HEAD_THRESH 4 + +struct ena_com_tx_ctx { + struct ena_com_tx_meta ena_meta; + struct ena_com_buf *ena_bufs; + /* For LLQ, header buffer - pushed to the device mem space */ + void *push_header; + + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + u16 num_bufs; + u16 req_id; + /* For regular queue, indicate the size of the header + * For LLQ, indicate the size of the pushed buffer + */ + u16 header_len; + + u8 meta_valid; + u8 tso_enable; + u8 l3_csum_enable; + u8 l4_csum_enable; + u8 l4_csum_partial; + u8 df; /* Don't fragment */ +}; + +struct ena_com_rx_ctx { + struct ena_com_rx_buf_info *ena_bufs; + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + bool l3_csum_err; + bool l4_csum_err; + /* fragmented packet */ + bool frag; + u32 hash; + u16 descs; + int max_bufs; +}; + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc); + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx); + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id); + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id); + +static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq, + struct ena_eth_io_intr_reg *intr_reg) +{ + writel(intr_reg->intr_control, io_cq->unmask_reg); +} + +static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) +{ + u16 tail, next_to_comp, cnt; + + next_to_comp = io_sq->next_to_comp; + tail = io_sq->tail; + cnt = tail - next_to_comp; + + return io_sq->q_depth - 1 - cnt; +} + +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) +{ + u16 tail; + + tail = io_sq->tail; + + pr_debug("write submission queue doorbell for queue: %d tail: %d\n", + io_sq->qid, tail); + + writel(tail, io_sq->db_addr); + + return 0; +} + +static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq) +{ + u16 unreported_comp, head; + bool need_update; + + head = io_cq->head; + unreported_comp = head - io_cq->last_head_update; + need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); + + if (io_cq->cq_head_db_reg && need_update) { + pr_debug("Write completion queue doorbell for queue %d: head: %d\n", + io_cq->qid, head); + writel(head, io_cq->cq_head_db_reg); + io_cq->last_head_update = head; + } + + return 0; +} + +static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq, + u8 numa_node) +{ + struct ena_eth_io_numa_node_cfg_reg numa_cfg; + + if (!io_cq->numa_node_cfg_reg) + return; + + numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK) + | ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK; + + writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg); +} + +static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem) +{ + io_sq->next_to_comp += elem; +} + +#endif /* ENA_ETH_COM_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h new file mode 100644 index 000000000000..f320c58793a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h @@ -0,0 +1,416 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ETH_IO_H_ +#define _ENA_ETH_IO_H_ + +enum ena_eth_io_l3_proto_index { + ENA_ETH_IO_L3_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L3_PROTO_IPV4 = 8, + + ENA_ETH_IO_L3_PROTO_IPV6 = 11, + + ENA_ETH_IO_L3_PROTO_FCOE = 21, + + ENA_ETH_IO_L3_PROTO_ROCE = 22, +}; + +enum ena_eth_io_l4_proto_index { + ENA_ETH_IO_L4_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L4_PROTO_TCP = 12, + + ENA_ETH_IO_L4_PROTO_UDP = 13, + + ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23, +}; + +struct ena_eth_io_tx_desc { + /* 15:0 : length - Buffer length in bytes, must + * include any packet trailers that the ENA supposed + * to update like End-to-End CRC, Authentication GMAC + * etc. This length must not include the + * 'Push_Buffer' length. This length must not include + * the 4-byte added in the end for 802.3 Ethernet FCS + * 21:16 : req_id_hi - Request ID[15:10] + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBZ + * 24 : phase + * 25 : reserved1 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 3:0 : l3_proto_idx - L3 protocol. This field + * required when l3_csum_en,l3_csum or tso_en are set. + * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and + * DF flags of the IPv4 header is 0. Otherwise must + * be set to 1 + * 6:5 : reserved5 + * 7 : tso_en - Enable TSO, For TCP only. + * 12:8 : l4_proto_idx - L4 protocol. This field need + * to be set when l4_csum_en or tso_en are set. + * 13 : l3_csum_en - enable IPv4 header checksum. + * 14 : l4_csum_en - enable TCP/UDP checksum. + * 15 : ethernet_fcs_dis - when set, the controller + * will not append the 802.3 Ethernet Frame Check + * Sequence to the packet + * 16 : reserved16 + * 17 : l4_csum_partial - L4 partial checksum. when + * set to 0, the ENA calculates the L4 checksum, + * where the Destination Address required for the + * TCP/UDP pseudo-header is taken from the actual + * packet L3 header. when set to 1, the ENA doesn't + * calculate the sum of the pseudo-header, instead, + * the checksum field of the L4 is used instead. When + * TSO enabled, the checksum of the pseudo-header + * must not include the tcp length field. L4 partial + * checksum should be used for IPv6 packet that + * contains Routing Headers. + * 20:18 : reserved18 - MBZ + * 21 : reserved21 - MBZ + * 31:22 : req_id_lo - Request ID[9:0] + */ + u32 meta_ctrl; + + u32 buff_addr_lo; + + /* address high and header size + * 15:0 : addr_hi - Buffer Pointer[47:32] + * 23:16 : reserved16_w2 + * 31:24 : header_length - Header length. For Low + * Latency Queues, this fields indicates the number + * of bytes written to the headers' memory. For + * normal queues, if packet is TCP or UDP, and longer + * than max_header_size, then this field should be + * set to the sum of L4 header offset and L4 header + * size(without options), otherwise, this field + * should be set to 0. For both modes, this field + * must not exceed the max_header_size. + * max_header_size value is reported by the Max + * Queues Feature descriptor + */ + u32 buff_addr_hi_hdr_sz; +}; + +struct ena_eth_io_tx_meta_desc { + /* 9:0 : req_id_lo - Request ID[9:0] + * 11:10 : reserved10 - MBZ + * 12 : reserved12 - MBZ + * 13 : reserved13 - MBZ + * 14 : ext_valid - if set, offset fields in Word2 + * are valid Also MSS High in Word 0 and bits [31:24] + * in Word 3 + * 15 : reserved15 + * 19:16 : mss_hi + * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1: + * Extended Metadata Descriptor + * 21 : meta_store - Store extended metadata in queue + * cache + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBO + * 24 : phase + * 25 : reserved25 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 5:0 : req_id_hi + * 31:6 : reserved6 - MBZ + */ + u32 word1; + + /* 7:0 : l3_hdr_len + * 15:8 : l3_hdr_off + * 21:16 : l4_hdr_len_in_words - counts the L4 header + * length in words. there is an explicit assumption + * that L4 header appears right after L3 header and + * L4 offset is based on l3_hdr_off+l3_hdr_len + * 31:22 : mss_lo + */ + u32 word2; + + u32 reserved; +}; + +struct ena_eth_io_tx_cdesc { + /* Request ID[15:0] */ + u16 req_id; + + u8 status; + + /* flags + * 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 sub_qid; + + u16 sq_head_idx; +}; + +struct ena_eth_io_rx_desc { + /* In bytes. 0 means 64KB */ + u16 length; + + /* MBZ */ + u8 reserved2; + + /* 0 : phase + * 1 : reserved1 - MBZ + * 2 : first - Indicates first descriptor in + * transaction + * 3 : last - Indicates last descriptor in transaction + * 4 : comp_req + * 5 : reserved5 - MBO + * 7:6 : reserved6 - MBZ + */ + u8 ctrl; + + u16 req_id; + + /* MBZ */ + u16 reserved6; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + /* MBZ */ + u16 reserved16_w3; +}; + +/* 4-word format Note: all ethernet parsing information are valid only when + * last=1 + */ +struct ena_eth_io_rx_cdesc_base { + /* 4:0 : l3_proto_idx + * 6:5 : src_vlan_cnt + * 7 : reserved7 - MBZ + * 12:8 : l4_proto_idx + * 13 : l3_csum_err - when set, either the L3 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l3_proto_idx indicates IPv4 packet + * 14 : l4_csum_err - when set, either the L4 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l4_proto_idx indicates TCP/UDP packet, and, + * ipv4_frag is not set + * 15 : ipv4_frag - Indicates IPv4 fragmented packet + * 23:16 : reserved16 + * 24 : phase + * 25 : l3_csum2 - second checksum engine result + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 29:28 : reserved28 + * 30 : buffer - 0: Metadata descriptor. 1: Buffer + * Descriptor was used + * 31 : reserved31 + */ + u32 status; + + u16 length; + + u16 req_id; + + /* 32-bit hash result */ + u32 hash; + + u16 sub_qid; + + u16 reserved; +}; + +/* 8-word format */ +struct ena_eth_io_rx_cdesc_ext { + struct ena_eth_io_rx_cdesc_base base; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + u16 reserved16; + + u32 reserved_w6; + + u32 reserved_w7; +}; + +struct ena_eth_io_intr_reg { + /* 14:0 : rx_intr_delay + * 29:15 : tx_intr_delay + * 30 : intr_unmask + * 31 : reserved + */ + u32 intr_control; +}; + +struct ena_eth_io_numa_node_cfg_reg { + /* 7:0 : numa + * 30:8 : reserved + * 31 : enabled + */ + u32 numa_cfg; +}; + +/* tx_desc */ +#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16 +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0) +#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4 +#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4) +#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7 +#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7) +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13 +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14) +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15 +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17) +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22 +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22) +#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24) + +/* tx_meta_desc */ +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0) +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14 +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14) +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16) +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20 +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20) +#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21 +#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21) +#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8 +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8) +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22 +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22) + +/* tx_cdesc */ +#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0) + +/* rx_desc */ +#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0) +#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2 +#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2) +#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3 +#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3) +#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4 +#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4) + +/* rx_cdesc_base */ +#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0) +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5 +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14) +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15 +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15) +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24 +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25) +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26 +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26) +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27 +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27) +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30 +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30) + +/* intr_reg */ +#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0) +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15 +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15) +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30 +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30) + +/* numa_node_cfg_reg */ +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0) +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31 +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31) + +#endif /*_ENA_ETH_IO_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c new file mode 100644 index 000000000000..67b2338f8fb3 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -0,0 +1,895 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "ena_netdev.h" + +struct ena_stats { + char name[ETH_GSTRING_LEN]; + int stat_offset; +}; + +#define ENA_STAT_ENA_COM_ENTRY(stat) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_com_stats_admin, stat) \ +} + +#define ENA_STAT_ENTRY(stat, stat_type) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ +} + +#define ENA_STAT_RX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, rx) + +#define ENA_STAT_TX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, tx) + +#define ENA_STAT_GLOBAL_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, dev) + +static const struct ena_stats ena_stats_global_strings[] = { + ENA_STAT_GLOBAL_ENTRY(tx_timeout), + ENA_STAT_GLOBAL_ENTRY(io_suspend), + ENA_STAT_GLOBAL_ENTRY(io_resume), + ENA_STAT_GLOBAL_ENTRY(wd_expired), + ENA_STAT_GLOBAL_ENTRY(interface_up), + ENA_STAT_GLOBAL_ENTRY(interface_down), + ENA_STAT_GLOBAL_ENTRY(admin_q_pause), +}; + +static const struct ena_stats ena_stats_tx_strings[] = { + ENA_STAT_TX_ENTRY(cnt), + ENA_STAT_TX_ENTRY(bytes), + ENA_STAT_TX_ENTRY(queue_stop), + ENA_STAT_TX_ENTRY(queue_wakeup), + ENA_STAT_TX_ENTRY(dma_mapping_err), + ENA_STAT_TX_ENTRY(linearize), + ENA_STAT_TX_ENTRY(linearize_failed), + ENA_STAT_TX_ENTRY(napi_comp), + ENA_STAT_TX_ENTRY(tx_poll), + ENA_STAT_TX_ENTRY(doorbells), + ENA_STAT_TX_ENTRY(prepare_ctx_err), + ENA_STAT_TX_ENTRY(missing_tx_comp), + ENA_STAT_TX_ENTRY(bad_req_id), +}; + +static const struct ena_stats ena_stats_rx_strings[] = { + ENA_STAT_RX_ENTRY(cnt), + ENA_STAT_RX_ENTRY(bytes), + ENA_STAT_RX_ENTRY(refil_partial), + ENA_STAT_RX_ENTRY(bad_csum), + ENA_STAT_RX_ENTRY(page_alloc_fail), + ENA_STAT_RX_ENTRY(skb_alloc_fail), + ENA_STAT_RX_ENTRY(dma_mapping_err), + ENA_STAT_RX_ENTRY(bad_desc_num), + ENA_STAT_RX_ENTRY(rx_copybreak_pkt), +}; + +static const struct ena_stats ena_stats_ena_com_strings[] = { + ENA_STAT_ENA_COM_ENTRY(aborted_cmd), + ENA_STAT_ENA_COM_ENTRY(submitted_cmd), + ENA_STAT_ENA_COM_ENTRY(completed_cmd), + ENA_STAT_ENA_COM_ENTRY(out_of_space), + ENA_STAT_ENA_COM_ENTRY(no_completion), +}; + +#define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) +#define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) +#define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) +#define ENA_STATS_ARRAY_ENA_COM ARRAY_SIZE(ena_stats_ena_com_strings) + +static void ena_safe_update_stat(u64 *src, u64 *dst, + struct u64_stats_sync *syncp) +{ + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(syncp); + *(dst) = *src; + } while (u64_stats_fetch_retry_irq(syncp, start)); +} + +static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + struct ena_ring *ring; + + u64 *ptr; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + ring = &adapter->tx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->tx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + + /* Rx stats */ + ring = &adapter->rx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->rx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + } +} + +static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + u32 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats + + (uintptr_t)ena_stats->stat_offset); + + *(*data)++ = *ptr; + } +} + +static void ena_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, + u64 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + u64 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + ptr = (u64 *)((uintptr_t)&adapter->dev_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, data++, &adapter->syncp); + } + + ena_queue_stats(adapter, &data); + ena_dev_admin_queue_stats(adapter, &data); +} + +int ena_get_sset_count(struct net_device *netdev, int sset) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + if (sset != ETH_SS_STATS) + return -EOPNOTSUPP; + + return adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM; +} + +static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) +{ + const struct ena_stats *ena_stats; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_tx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + /* Rx stats */ + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_rx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + } +} + +static void ena_com_dev_strings(u8 **data) +{ + const struct ena_stats *ena_stats; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + snprintf(*data, ETH_GSTRING_LEN, + "ena_admin_q_%s", ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } +} + +static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + int i; + + if (sset != ETH_SS_STATS) + return; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + memcpy(data, ena_stats->name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + ena_queue_strings(adapter, &data); + ena_com_dev_strings(&data); +} + +static int ena_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_admin_get_feature_link_desc *link; + struct ena_admin_get_feat_resp feat_resp; + int rc; + + rc = ena_com_get_link_params(ena_dev, &feat_resp); + if (rc) + return rc; + + link = &feat_resp.u.link; + link_ksettings->base.speed = link->speed; + + if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + } + + link_ksettings->base.autoneg = + (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + link_ksettings->base.duplex = DUPLEX_FULL; + + return 0; +} + +static int ena_get_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + coalesce->tx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) / + ena_dev->intr_delay_resolution; + if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) { + coalesce->rx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev) + / ena_dev->intr_delay_resolution; + } else { + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval; + } + coalesce->use_adaptive_rx_coalesce = + ena_com_get_adaptive_moderation_enabled(ena_dev); + + return 0; +} + +static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) +{ + unsigned int val; + int i; + + val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev); + + for (i = 0; i < adapter->num_queues; i++) + adapter->tx_ring[i].smoothed_interval = val; +} + +static int ena_set_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + int rc; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + + if (coalesce->rx_coalesce_usecs_irq || + coalesce->rx_max_coalesced_frames_irq || + coalesce->tx_coalesce_usecs_irq || + coalesce->tx_max_coalesced_frames || + coalesce->tx_max_coalesced_frames_irq || + coalesce->stats_block_coalesce_usecs || + coalesce->use_adaptive_tx_coalesce || + coalesce->pkt_rate_low || + coalesce->tx_coalesce_usecs_low || + coalesce->tx_max_coalesced_frames_low || + coalesce->pkt_rate_high || + coalesce->tx_coalesce_usecs_high || + coalesce->tx_max_coalesced_frames_high || + coalesce->rate_sample_interval) + return -EINVAL; + + rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev, + coalesce->tx_coalesce_usecs); + if (rc) + return rc; + + ena_update_tx_rings_intr_moderation(adapter); + + if (ena_com_get_adaptive_moderation_enabled(ena_dev)) { + if (!coalesce->use_adaptive_rx_coalesce) { + ena_com_disable_adaptive_moderation(ena_dev); + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } else { /* was in non-adaptive mode */ + if (coalesce->use_adaptive_rx_coalesce) { + ena_com_enable_adaptive_moderation(ena_dev); + } else { + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + + return 0; +} + +static u32 ena_get_msglevel(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +static void ena_set_msglevel(struct net_device *netdev, u32 value) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + adapter->msg_enable = value; +} + +static void ena_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); + strlcpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); +} + +static void ena_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_ring *tx_ring = &adapter->tx_ring[0]; + struct ena_ring *rx_ring = &adapter->rx_ring[0]; + + ring->rx_max_pending = rx_ring->ring_size; + ring->tx_max_pending = tx_ring->ring_size; + ring->rx_pending = rx_ring->ring_size; + ring->tx_pending = tx_ring->ring_size; +} + +static u32 ena_flow_hash_to_flow_type(u16 hash_fields) +{ + u32 data = 0; + + if (hash_fields & ENA_ADMIN_RSS_L2_DA) + data |= RXH_L2DA; + + if (hash_fields & ENA_ADMIN_RSS_L3_DA) + data |= RXH_IP_DST; + + if (hash_fields & ENA_ADMIN_RSS_L3_SA) + data |= RXH_IP_SRC; + + if (hash_fields & ENA_ADMIN_RSS_L4_DP) + data |= RXH_L4_B_2_3; + + if (hash_fields & ENA_ADMIN_RSS_L4_SP) + data |= RXH_L4_B_0_1; + + return data; +} + +static u16 ena_flow_data_to_flow_hash(u32 hash_fields) +{ + u16 data = 0; + + if (hash_fields & RXH_L2DA) + data |= ENA_ADMIN_RSS_L2_DA; + + if (hash_fields & RXH_IP_DST) + data |= ENA_ADMIN_RSS_L3_DA; + + if (hash_fields & RXH_IP_SRC) + data |= ENA_ADMIN_RSS_L3_SA; + + if (hash_fields & RXH_L4_B_2_3) + data |= ENA_ADMIN_RSS_L4_DP; + + if (hash_fields & RXH_L4_B_0_1) + data |= ENA_ADMIN_RSS_L4_SP; + + return data; +} + +static int ena_get_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + int rc; + + cmd->data = 0; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields); + if (rc) { + /* If device don't have permission, return unsupported */ + if (rc == -EPERM) + rc = -EOPNOTSUPP; + return rc; + } + + cmd->data = ena_flow_hash_to_flow_type(hash_fields); + + return 0; +} + +static int ena_set_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + hash_fields = ena_flow_data_to_flow_hash(cmd->data); + + return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields); +} + +static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_SRXFH: + rc = ena_set_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info, + u32 *rules) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = adapter->num_queues; + rc = 0; + break; + case ETHTOOL_GRXFH: + rc = ena_get_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static u32 ena_get_rxfh_indir_size(struct net_device *netdev) +{ + return ENA_RX_RSS_TABLE_SIZE; +} + +static u32 ena_get_rxfh_key_size(struct net_device *netdev) +{ + return ENA_HASH_KEY_SIZE; +} + +static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + enum ena_admin_hash_functions ena_func; + u8 func; + int rc; + + rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + if (rc) + return rc; + + rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + if (rc) + return rc; + + switch (ena_func) { + case ENA_ADMIN_TOEPLITZ: + func = ETH_RSS_HASH_TOP; + break; + case ENA_ADMIN_CRC32: + func = ETH_RSS_HASH_XOR; + break; + default: + netif_err(adapter, drv, netdev, + "Command parameter is not supported\n"); + return -EOPNOTSUPP; + } + + if (hfunc) + *hfunc = func; + + return rc; +} + +static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + enum ena_admin_hash_functions func; + int rc, i; + + if (indir) { + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + rc = ena_com_indirect_table_fill_entry(ena_dev, + ENA_IO_RXQ_IDX(indir[i]), + i); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, + "Cannot fill indirect table (index is too large)\n"); + return rc; + } + } + + rc = ena_com_indirect_table_set(ena_dev); + if (rc) { + netif_err(adapter, drv, netdev, + "Cannot set indirect table\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + switch (hfunc) { + case ETH_RSS_HASH_TOP: + func = ENA_ADMIN_TOEPLITZ; + break; + case ETH_RSS_HASH_XOR: + func = ENA_ADMIN_CRC32; + break; + default: + netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n", + hfunc); + return -EOPNOTSUPP; + } + + if (key) { + rc = ena_com_fill_hash_function(ena_dev, func, key, + ENA_HASH_KEY_SIZE, + 0xFFFFFFFF); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, "Cannot fill key\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + return 0; +} + +static void ena_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + channels->max_rx = ENA_MAX_NUM_IO_QUEUES; + channels->max_tx = ENA_MAX_NUM_IO_QUEUES; + channels->max_other = 0; + channels->max_combined = 0; + channels->rx_count = adapter->num_queues; + channels->tx_count = adapter->num_queues; + channels->other_count = 0; + channels->combined_count = 0; +} + +static int ena_get_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = adapter->rx_copybreak; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ena_set_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + u32 len; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + len = *(u32 *)data; + if (len > adapter->netdev->mtu) { + ret = -EINVAL; + break; + } + adapter->rx_copybreak = len; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct ethtool_ops ena_ethtool_ops = { + .get_link_ksettings = ena_get_link_ksettings, + .get_drvinfo = ena_get_drvinfo, + .get_msglevel = ena_get_msglevel, + .set_msglevel = ena_set_msglevel, + .get_link = ethtool_op_get_link, + .get_coalesce = ena_get_coalesce, + .set_coalesce = ena_set_coalesce, + .get_ringparam = ena_get_ringparam, + .get_sset_count = ena_get_sset_count, + .get_strings = ena_get_strings, + .get_ethtool_stats = ena_get_ethtool_stats, + .get_rxnfc = ena_get_rxnfc, + .set_rxnfc = ena_set_rxnfc, + .get_rxfh_indir_size = ena_get_rxfh_indir_size, + .get_rxfh_key_size = ena_get_rxfh_key_size, + .get_rxfh = ena_get_rxfh, + .set_rxfh = ena_set_rxfh, + .get_channels = ena_get_channels, + .get_tunable = ena_get_tunable, + .set_tunable = ena_set_tunable, +}; + +void ena_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ena_ethtool_ops; +} + +static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf) +{ + struct net_device *netdev = adapter->netdev; + u8 *strings_buf; + u64 *data_buf; + int strings_num; + int i, rc; + + strings_num = ena_get_sset_count(netdev, ETH_SS_STATS); + if (strings_num <= 0) { + netif_err(adapter, drv, netdev, "Can't get stats num\n"); + return; + } + + strings_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * ETH_GSTRING_LEN, + GFP_ATOMIC); + if (!strings_buf) { + netif_err(adapter, drv, netdev, + "failed to alloc strings_buf\n"); + return; + } + + data_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * sizeof(u64), + GFP_ATOMIC); + if (!data_buf) { + netif_err(adapter, drv, netdev, + "failed to allocate data buf\n"); + devm_kfree(&adapter->pdev->dev, strings_buf); + return; + } + + ena_get_strings(netdev, ETH_SS_STATS, strings_buf); + ena_get_ethtool_stats(netdev, NULL, data_buf); + + /* If there is a buffer, dump stats, otherwise print them to dmesg */ + if (buf) + for (i = 0; i < strings_num; i++) { + rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64), + "%s %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + buf += rc; + } + else + for (i = 0; i < strings_num; i++) + netif_err(adapter, drv, netdev, "%s: %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + + devm_kfree(&adapter->pdev->dev, strings_buf); + devm_kfree(&adapter->pdev->dev, data_buf); +} + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf) +{ + if (!buf) + return; + + ena_dump_stats_ex(adapter, buf); +} + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter) +{ + ena_dump_stats_ex(adapter, NULL); +} diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c new file mode 100644 index 000000000000..94790df2d559 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -0,0 +1,3280 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifdef CONFIG_RFS_ACCEL +#include +#endif /* CONFIG_RFS_ACCEL */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_netdev.h" +#include "ena_pci_id_tbl.h" + +static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; + +MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); +MODULE_DESCRIPTION(DEVICE_NAME); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (5 * HZ) + +#define ENA_NAPI_BUDGET 64 + +#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ + NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) +static int debug = -1; +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static struct ena_aenq_handlers aenq_handlers; + +static struct workqueue_struct *ena_wq; + +MODULE_DEVICE_TABLE(pci, ena_pci_tbl); + +static int ena_rss_init_default(struct ena_adapter *adapter); + +static void ena_tx_timeout(struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.tx_timeout++; + u64_stats_update_end(&adapter->syncp); + + netif_err(adapter, tx_err, dev, "Transmit time out\n"); + + /* Change the state of the device to trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +} + +static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + adapter->rx_ring[i].mtu = mtu; +} + +static int ena_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ena_adapter *adapter = netdev_priv(dev); + int ret; + + if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { + netif_err(adapter, drv, dev, + "Invalid MTU setting. new_mtu: %d\n", new_mtu); + + return -EINVAL; + } + + ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); + if (!ret) { + netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); + update_rx_ring_mtu(adapter, new_mtu); + dev->mtu = new_mtu; + } else { + netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", + new_mtu); + } + + return ret; +} + +static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) +{ +#ifdef CONFIG_RFS_ACCEL + u32 i; + int rc; + + adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues); + if (!adapter->netdev->rx_cpu_rmap) + return -ENOMEM; + for (i = 0; i < adapter->num_queues; i++) { + int irq_idx = ENA_IO_IRQ_IDX(i); + + rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, + adapter->msix_entries[irq_idx].vector); + if (rc) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + return rc; + } + } +#endif /* CONFIG_RFS_ACCEL */ + return 0; +} + +static void ena_init_io_rings_common(struct ena_adapter *adapter, + struct ena_ring *ring, u16 qid) +{ + ring->qid = qid; + ring->pdev = adapter->pdev; + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + ring->napi = &adapter->ena_napi[qid].napi; + ring->adapter = adapter; + ring->ena_dev = adapter->ena_dev; + ring->per_napi_packets = 0; + ring->per_napi_bytes = 0; + ring->cpu = 0; + u64_stats_init(&ring->syncp); +} + +static void ena_init_io_rings(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev; + struct ena_ring *txr, *rxr; + int i; + + ena_dev = adapter->ena_dev; + + for (i = 0; i < adapter->num_queues; i++) { + txr = &adapter->tx_ring[i]; + rxr = &adapter->rx_ring[i]; + + /* TX/RX common ring state */ + ena_init_io_rings_common(adapter, txr, i); + ena_init_io_rings_common(adapter, rxr, i); + + /* TX specific ring state */ + txr->ring_size = adapter->tx_ring_size; + txr->tx_max_header_size = ena_dev->tx_max_header_size; + txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; + txr->sgl_size = adapter->max_tx_sgl_size; + txr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); + + /* RX specific ring state */ + rxr->ring_size = adapter->rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + } +} + +/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Return 0 on success, negative on failure + */ +static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, i, node; + + if (tx_ring->tx_buffer_info) { + netif_err(adapter, ifup, + adapter->netdev, "tx_buffer_info info is not NULL"); + return -EEXIST; + } + + size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; + node = cpu_to_node(ena_irq->cpu); + + tx_ring->tx_buffer_info = vzalloc_node(size, node); + if (!tx_ring->tx_buffer_info) { + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + return -ENOMEM; + } + + size = sizeof(u16) * tx_ring->ring_size; + tx_ring->free_tx_ids = vzalloc_node(size, node); + if (!tx_ring->free_tx_ids) { + tx_ring->free_tx_ids = vzalloc(size); + if (!tx_ring->free_tx_ids) { + vfree(tx_ring->tx_buffer_info); + return -ENOMEM; + } + } + + /* Req id ring for TX out of order completions */ + for (i = 0; i < tx_ring->ring_size; i++) + tx_ring->free_tx_ids[i] = i; + + /* Reset tx statistics */ + memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + tx_ring->cpu = ena_irq->cpu; + return 0; +} + +/* ena_free_tx_resources - Free I/O Tx Resources per Queue + * @adapter: network interface device structure + * @qid: queue index + * + * Free all transmit software resources + */ +static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + vfree(tx_ring->free_tx_ids); + tx_ring->free_tx_ids = NULL; +} + +/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues + * @adapter: private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_tx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_tx_resources(adapter, i); + if (rc) + goto err_setup_tx; + } + + return 0; + +err_setup_tx: + + netif_err(adapter, ifup, adapter->netdev, + "Tx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_tx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + */ +static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_tx_resources(adapter, i); +} + +/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Returns 0 on success, negative on failure + */ +static int ena_setup_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, node; + + if (rx_ring->rx_buffer_info) { + netif_err(adapter, ifup, adapter->netdev, + "rx_buffer_info is not NULL"); + return -EEXIST; + } + + /* alloc extra element so in rx path + * we can always prefetch rx_info + 1 + */ + size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1); + node = cpu_to_node(ena_irq->cpu); + + rx_ring->rx_buffer_info = vzalloc_node(size, node); + if (!rx_ring->rx_buffer_info) { + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + return -ENOMEM; + } + + /* Reset rx statistics */ + memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); + + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + rx_ring->cpu = ena_irq->cpu; + + return 0; +} + +/* ena_free_rx_resources - Free I/O Rx Resources + * @adapter: network interface device structure + * @qid: queue index + * + * Free all receive software resources + */ +static void ena_free_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; +} + +/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_rx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_rx_resources(adapter, i); + if (rc) + goto err_setup_rx; + } + + return 0; + +err_setup_rx: + + netif_err(adapter, ifup, adapter->netdev, + "Rx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_rx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + */ +static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_resources(adapter, i); +} + +static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, gfp_t gfp) +{ + struct ena_com_buf *ena_buf; + struct page *page; + dma_addr_t dma; + + /* if previous allocated page is not used */ + if (unlikely(rx_info->page)) + return 0; + + page = alloc_page(gfp); + if (unlikely(!page)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.page_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return -ENOMEM; + } + + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.dma_mapping_err++; + u64_stats_update_end(&rx_ring->syncp); + + __free_page(page); + return -EIO; + } + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "alloc page %p, rx_info %p\n", page, rx_info); + + rx_info->page = page; + rx_info->page_offset = 0; + ena_buf = &rx_info->ena_buf; + ena_buf->paddr = dma; + ena_buf->len = PAGE_SIZE; + + return 0; +} + +static void ena_free_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info) +{ + struct page *page = rx_info->page; + struct ena_com_buf *ena_buf = &rx_info->ena_buf; + + if (unlikely(!page)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "Trying to free unallocated buffer\n"); + return; + } + + dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE, + DMA_FROM_DEVICE); + + __free_page(page); + rx_info->page = NULL; +} + +static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) +{ + u16 next_to_use; + u32 i; + int rc; + + next_to_use = rx_ring->next_to_use; + + for (i = 0; i < num; i++) { + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[next_to_use]; + + rc = ena_alloc_rx_page(rx_ring, rx_info, + __GFP_COLD | GFP_ATOMIC | __GFP_COMP); + if (unlikely(rc < 0)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "failed to alloc buffer for rx queue %d\n", + rx_ring->qid); + break; + } + rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, + &rx_info->ena_buf, + next_to_use); + if (unlikely(rc)) { + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "failed to add buffer for rx queue %d\n", + rx_ring->qid); + break; + } + next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, + rx_ring->ring_size); + } + + if (unlikely(i < num)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.refil_partial++; + u64_stats_update_end(&rx_ring->syncp); + netdev_warn(rx_ring->netdev, + "refilled rx qid %d with only %d buffers (from %d)\n", + rx_ring->qid, i, num); + } + + if (likely(i)) { + /* Add memory barrier to make sure the desc were written before + * issue a doorbell + */ + wmb(); + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); + } + + rx_ring->next_to_use = next_to_use; + + return i; +} + +static void ena_free_rx_bufs(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + u32 i; + + for (i = 0; i < rx_ring->ring_size; i++) { + struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; + + if (rx_info->page) + ena_free_rx_page(rx_ring, rx_info); + } +} + +/* ena_refill_all_rx_bufs - allocate all queues Rx buffers + * @adapter: board private structure + * + */ +static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, rc, bufs_num; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + bufs_num = rx_ring->ring_size - 1; + rc = ena_refill_rx_bufs(rx_ring, bufs_num); + + if (unlikely(rc != bufs_num)) + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "refilling Queue %d failed. allocated %d buffers from: %d\n", + i, rc, bufs_num); + } +} + +static void ena_free_all_rx_bufs(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_bufs(adapter, i); +} + +/* ena_free_tx_bufs - Free Tx Buffers per Queue + * @tx_ring: TX ring for which buffers be freed + */ +static void ena_free_tx_bufs(struct ena_ring *tx_ring) +{ + u32 i; + + for (i = 0; i < tx_ring->ring_size; i++) { + struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; + struct ena_com_buf *ena_buf; + int nr_frags; + int j; + + if (!tx_info->skb) + continue; + + netdev_notice(tx_ring->netdev, + "free uncompleted tx skb qid %d idx 0x%x\n", + tx_ring->qid, i); + + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (j = 0; j < nr_frags; j++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + } + + dev_kfree_skb_any(tx_info->skb); + } + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); +} + +static void ena_free_all_tx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *tx_ring; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + ena_free_tx_bufs(tx_ring); + } +} + +static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_TXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_RXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_io_queues(struct ena_adapter *adapter) +{ + ena_destroy_all_tx_queues(adapter); + ena_destroy_all_rx_queues(adapter); +} + +static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info = NULL; + + if (likely(req_id < tx_ring->ring_size)) { + tx_info = &tx_ring->tx_buffer_info[req_id]; + if (likely(tx_info->skb)) + return 0; + } + + if (tx_info) + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_info doesn't have valid skb\n"); + else + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "Invalid req_id: %hu\n", req_id); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.bad_req_id++; + u64_stats_update_end(&tx_ring->syncp); + + /* Trigger device reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); + return -EFAULT; +} + +static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) +{ + struct netdev_queue *txq; + bool above_thresh; + u32 tx_bytes = 0; + u32 total_done = 0; + u16 next_to_clean; + u16 req_id; + int tx_pkts = 0; + int rc; + + next_to_clean = tx_ring->next_to_clean; + txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid); + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct sk_buff *skb; + struct ena_com_buf *ena_buf; + int i, nr_frags; + + rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, + &req_id); + if (rc) + break; + + rc = validate_tx_req_id(tx_ring, req_id); + if (rc) + break; + + tx_info = &tx_ring->tx_buffer_info[req_id]; + skb = tx_info->skb; + + /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */ + prefetch(&skb->end); + + tx_info->skb = NULL; + tx_info->last_jiffies = 0; + + if (likely(tx_info->num_of_bufs != 0)) { + ena_buf = tx_info->bufs; + + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (i = 0; i < nr_frags; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + } + } + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d skb %p completed\n", tx_ring->qid, + skb); + + tx_bytes += skb->len; + dev_kfree_skb(skb); + tx_pkts++; + total_done += tx_info->tx_descs; + + tx_ring->free_tx_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + tx_ring->ring_size); + } + + tx_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); + + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + tx_ring->qid, tx_pkts); + + /* need to make the rings circular update visible to + * ena_start_xmit() before checking for netif_queue_stopped(). + */ + smp_mb(); + + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { + __netif_tx_lock(txq, smp_processor_id()); + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (netif_tx_queue_stopped(txq) && above_thresh) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + __netif_tx_unlock(txq); + } + + tx_ring->per_napi_bytes += tx_bytes; + tx_ring->per_napi_packets += tx_pkts; + + return tx_pkts; +} + +static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + struct ena_com_rx_buf_info *ena_bufs, + u32 descs, + u16 *next_to_clean) +{ + struct sk_buff *skb; + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[*next_to_clean]; + u32 len; + u32 buf = 0; + void *va; + + len = ena_bufs[0].len; + if (unlikely(!rx_info->page)) { + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Page is NULL\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_info %p page %p\n", + rx_info, rx_info->page); + + /* save virt address of first buffer */ + va = page_address(rx_info->page) + rx_info->page_offset; + prefetch(va + NET_IP_ALIGN); + + if (len <= rx_ring->rx_copybreak) { + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_copybreak); + if (unlikely(!skb)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Failed to allocate skb\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx allocated small packet. len %d. data_len %d\n", + skb->len, skb->data_len); + + /* sync this buffer for CPU use */ + dma_sync_single_for_cpu(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, va, len); + dma_sync_single_for_device(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + + skb_put(skb, len); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, + rx_ring->ring_size); + return skb; + } + + skb = napi_get_frags(rx_ring->napi); + if (unlikely(!skb)) { + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "Failed allocating skb\n"); + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return NULL; + } + + do { + dma_unmap_page(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + PAGE_SIZE, DMA_FROM_DEVICE); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, + rx_info->page_offset, len, PAGE_SIZE); + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx skb updated. len %d. data_len %d\n", + skb->len, skb->data_len); + + rx_info->page = NULL; + *next_to_clean = + ENA_RX_RING_IDX_NEXT(*next_to_clean, + rx_ring->ring_size); + if (likely(--descs == 0)) + break; + rx_info = &rx_ring->rx_buffer_info[*next_to_clean]; + len = ena_bufs[++buf].len; + } while (1); + + return skb; +} + +/* ena_rx_checksum - indicate in skb if hw indicated a good cksum + * @adapter: structure containing adapter specific data + * @ena_rx_ctx: received packet context/metadata + * @skb: skb currently being received and modified + */ +static inline void ena_rx_checksum(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + /* Rx csum disabled */ + if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* For fragmented packets the checksum isn't valid */ + if (ena_rx_ctx->frag) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* if IP and error */ + if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && + (ena_rx_ctx->l3_csum_err))) { + /* ipv4 checksum error */ + skb->ip_summed = CHECKSUM_NONE; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX IPv4 header checksum error\n"); + return; + } + + /* if TCP/UDP */ + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) { + if (unlikely(ena_rx_ctx->l4_csum_err)) { + /* TCP/UDP checksum error */ + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX L4 checksum error\n"); + skb->ip_summed = CHECKSUM_NONE; + return; + } + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} + +static void ena_set_rx_hash(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + enum pkt_hash_types hash_type; + + if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) { + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) + + hash_type = PKT_HASH_TYPE_L4; + else + hash_type = PKT_HASH_TYPE_NONE; + + /* Override hash type if the packet is fragmented */ + if (ena_rx_ctx->frag) + hash_type = PKT_HASH_TYPE_NONE; + + skb_set_hash(skb, ena_rx_ctx->hash, hash_type); + } +} + +/* ena_clean_rx_irq - Cleanup RX irq + * @rx_ring: RX ring to clean + * @napi: napi handler + * @budget: how many packets driver is allowed to clean + * + * Returns the number of cleaned buffers. + */ +static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + u32 budget) +{ + u16 next_to_clean = rx_ring->next_to_clean; + u32 res_budget, work_done; + + struct ena_com_rx_ctx ena_rx_ctx; + struct ena_adapter *adapter; + struct sk_buff *skb; + int refill_required; + int refill_threshold; + int rc = 0; + int total_len = 0; + int rx_copybreak_pkt = 0; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "%s qid %d\n", __func__, rx_ring->qid); + res_budget = budget; + + do { + ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; + ena_rx_ctx.max_bufs = rx_ring->sgl_size; + ena_rx_ctx.descs = 0; + rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, + rx_ring->ena_com_io_sq, + &ena_rx_ctx); + if (unlikely(rc)) + goto error; + + if (unlikely(ena_rx_ctx.descs == 0)) + break; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", + rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, + ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + + /* allocate skb and fill it */ + skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, + &next_to_clean); + + /* exit if we failed to retrieve a buffer */ + if (unlikely(!skb)) { + next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean, + ena_rx_ctx.descs, + rx_ring->ring_size); + break; + } + + ena_rx_checksum(rx_ring, &ena_rx_ctx, skb); + + ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb); + + skb_record_rx_queue(skb, rx_ring->qid); + + if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) { + total_len += rx_ring->ena_bufs[0].len; + rx_copybreak_pkt++; + napi_gro_receive(napi, skb); + } else { + total_len += skb->len; + napi_gro_frags(napi); + } + + res_budget--; + } while (likely(res_budget)); + + work_done = budget - res_budget; + rx_ring->per_napi_bytes += total_len; + rx_ring->per_napi_packets += work_done; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bytes += total_len; + rx_ring->rx_stats.cnt += work_done; + rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt; + u64_stats_update_end(&rx_ring->syncp); + + rx_ring->next_to_clean = next_to_clean; + + refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER; + + /* Optimization, try to batch new rx buffers */ + if (refill_required > refill_threshold) { + ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); + ena_refill_rx_bufs(rx_ring, refill_required); + } + + return work_done; + +error: + adapter = netdev_priv(rx_ring->netdev); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_desc_num++; + u64_stats_update_end(&rx_ring->syncp); + + /* Too many desc from the device. Trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + + return 0; +} + +inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, + struct ena_ring *tx_ring) +{ + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + ena_com_calculate_interrupt_delay(rx_ring->ena_dev, + rx_ring->per_napi_packets, + rx_ring->per_napi_bytes, + &rx_ring->smoothed_interval, + &rx_ring->moder_tbl_idx); + + /* Reset per napi packets/bytes */ + tx_ring->per_napi_packets = 0; + tx_ring->per_napi_bytes = 0; + rx_ring->per_napi_packets = 0; + rx_ring->per_napi_bytes = 0; +} + +static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) +{ + int cpu = get_cpu(); + int numa_node; + + /* Check only one ring since the 2 rings are running on the same cpu */ + if (likely(tx_ring->cpu == cpu)) + goto out; + + numa_node = cpu_to_node(cpu); + put_cpu(); + + if (numa_node != NUMA_NO_NODE) { + ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); + ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + } + + tx_ring->cpu = cpu; + rx_ring->cpu = cpu; + + return; +out: + put_cpu(); +} + +static int ena_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + struct ena_ring *tx_ring, *rx_ring; + struct ena_eth_io_intr_reg intr_reg; + + u32 tx_work_done; + u32 rx_work_done; + int tx_budget; + int napi_comp_call = 0; + int ret; + + tx_ring = ena_napi->tx_ring; + rx_ring = ena_napi->rx_ring; + + tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; + + if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); + rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + + if ((budget > rx_work_done) && (tx_budget > tx_work_done)) { + napi_complete_done(napi, rx_work_done); + + napi_comp_call = 1; + /* Tx and Rx share the same interrupt vector */ + if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) + ena_adjust_intr_moderation(rx_ring, tx_ring); + + /* Update intr register: rx intr delay, tx intr delay and + * interrupt unmask + */ + ena_com_update_intr_reg(&intr_reg, + rx_ring->smoothed_interval, + tx_ring->smoothed_interval, + true); + + /* It is a shared MSI-X. Tx and Rx CQ have pointer to it. + * So we use one of them to reach the intr reg + */ + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + + ena_update_ring_numa_node(tx_ring, rx_ring); + + ret = rx_work_done; + } else { + ret = budget; + } + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.napi_comp += napi_comp_call; + tx_ring->tx_stats.tx_poll++; + u64_stats_update_end(&tx_ring->syncp); + + return ret; +} + +static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + ena_com_admin_q_comp_intr_handler(adapter->ena_dev); + + /* Don't call the aenq handler before probe is done */ + if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))) + ena_com_aenq_intr_handler(adapter->ena_dev, data); + + return IRQ_HANDLED; +} + +/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx + * @irq: interrupt number + * @data: pointer to a network interface private napi device structure + */ +static irqreturn_t ena_intr_msix_io(int irq, void *data) +{ + struct ena_napi *ena_napi = data; + + napi_schedule(&ena_napi->napi); + + return IRQ_HANDLED; +} + +static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) +{ + int i, msix_vecs, rc; + + if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, probe, adapter->netdev, + "Error, MSI-X is already enabled\n"); + return -EPERM; + } + + /* Reserved the max msix vectors we might need */ + msix_vecs = ENA_MAX_MSIX_VEC(num_queues); + + netif_dbg(adapter, probe, adapter->netdev, + "trying to enable MSI-X, vectors %d\n", msix_vecs); + + adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry)); + + if (!adapter->msix_entries) + return -ENOMEM; + + for (i = 0; i < msix_vecs; i++) + adapter->msix_entries[i].entry = i; + + rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs); + if (rc != 0) { + netif_err(adapter, probe, adapter->netdev, + "Failed to enable MSI-X, vectors %d rc %d\n", + msix_vecs, rc); + return -ENOSPC; + } + + netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n", + msix_vecs); + + if (msix_vecs >= 1) { + if (ena_init_rx_cpu_rmap(adapter)) + netif_warn(adapter, probe, adapter->netdev, + "Failed to map IRQs to CPUs\n"); + } + + adapter->msix_vecs = msix_vecs; + set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags); + + return 0; +} + +static void ena_setup_mgmnt_intr(struct ena_adapter *adapter) +{ + u32 cpu; + + snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, + ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s", + pci_name(adapter->pdev)); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = + ena_intr_msix_mgmnt; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = + adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; + cpu = cpumask_first(cpu_online_mask); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu; + cpumask_set_cpu(cpu, + &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask); +} + +static void ena_setup_io_intr(struct ena_adapter *adapter) +{ + struct net_device *netdev; + int irq_idx, i, cpu; + + netdev = adapter->netdev; + + for (i = 0; i < adapter->num_queues; i++) { + irq_idx = ENA_IO_IRQ_IDX(i); + cpu = i % num_online_cpus(); + + snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, + "%s-Tx-Rx-%d", netdev->name, i); + adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io; + adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i]; + adapter->irq_tbl[irq_idx].vector = + adapter->msix_entries[irq_idx].vector; + adapter->irq_tbl[irq_idx].cpu = cpu; + + cpumask_set_cpu(cpu, + &adapter->irq_tbl[irq_idx].affinity_hint_mask); + } +} + +static int ena_request_mgmnt_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, probe, adapter->netdev, + "failed to request admin irq\n"); + return rc; + } + + netif_dbg(adapter, probe, adapter->netdev, + "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n", + irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + + return rc; +} + +static int ena_request_io_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc = 0, i, k; + + if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ: MSI-X is not enabled\n"); + return -EINVAL; + } + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ. index %d rc %d\n", + i, rc); + goto err; + } + + netif_dbg(adapter, ifup, adapter->netdev, + "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n", + i, irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + } + + return rc; + +err: + for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) { + irq = &adapter->irq_tbl[k]; + free_irq(irq->vector, irq->data); + } + + return rc; +} + +static void ena_free_mgmnt_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + synchronize_irq(irq->vector); + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); +} + +static void ena_free_io_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + int i; + +#ifdef CONFIG_RFS_ACCEL + if (adapter->msix_vecs >= 1) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); + } +} + +static void ena_disable_msix(struct ena_adapter *adapter) +{ + if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) + pci_disable_msix(adapter->pdev); + + if (adapter->msix_entries) + vfree(adapter->msix_entries); + adapter->msix_entries = NULL; +} + +static void ena_disable_io_intr_sync(struct ena_adapter *adapter) +{ + int i; + + if (!netif_running(adapter->netdev)) + return; + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) + synchronize_irq(adapter->irq_tbl[i].vector); +} + +static void ena_del_napi(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + netif_napi_del(&adapter->ena_napi[i].napi); +} + +static void ena_init_napi(struct ena_adapter *adapter) +{ + struct ena_napi *napi; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + napi = &adapter->ena_napi[i]; + + netif_napi_add(adapter->netdev, + &adapter->ena_napi[i].napi, + ena_io_poll, + ENA_NAPI_BUDGET); + napi->rx_ring = &adapter->rx_ring[i]; + napi->tx_ring = &adapter->tx_ring[i]; + napi->qid = i; + } +} + +static void ena_napi_disable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_disable(&adapter->ena_napi[i].napi); +} + +static void ena_napi_enable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_enable(&adapter->ena_napi[i].napi); +} + +static void ena_restore_ethtool_params(struct ena_adapter *adapter) +{ + adapter->tx_usecs = 0; + adapter->rx_usecs = 0; + adapter->tx_frames = 1; + adapter->rx_frames = 1; +} + +/* Configure the Rx forwarding */ +static int ena_rss_configure(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc; + + /* In case the RSS table wasn't initialized by probe */ + if (!ena_dev->rss.tbl_log_size) { + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to init RSS rc: %d\n", rc); + return rc; + } + } + + /* Set indirect table */ + rc = ena_com_indirect_table_set(ena_dev); + if (unlikely(rc && rc != -EPERM)) + return rc; + + /* Configure hash function (if supported) */ + rc = ena_com_set_hash_function(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + /* Configure hash inputs (if supported) */ + rc = ena_com_set_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + return 0; +} + +static int ena_up_complete(struct ena_adapter *adapter) +{ + int rc, i; + + rc = ena_rss_configure(adapter); + if (rc) + return rc; + + ena_init_napi(adapter); + + ena_change_mtu(adapter->netdev, adapter->netdev->mtu); + + ena_refill_all_rx_bufs(adapter); + + /* enable transmits */ + netif_tx_start_all_queues(adapter->netdev); + + ena_restore_ethtool_params(adapter); + + ena_napi_enable_all(adapter); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + + return 0; +} + +static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_com_dev *ena_dev; + struct ena_ring *tx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + tx_ring = &adapter->tx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_TXQ_IDX(qid); + + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; + ctx.qid = ena_qid; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->tx_ring_size; + ctx.numa_node = cpu_to_node(tx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O TX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &tx_ring->ena_com_io_sq, + &tx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get TX queue handlers. TX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); + return rc; +} + +static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_tx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); + + return rc; +} + +static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_dev *ena_dev; + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_ring *rx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + rx_ring = &adapter->rx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_RXQ_IDX(qid); + + ctx.qid = ena_qid; + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->rx_ring_size; + ctx.numa_node = cpu_to_node(rx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O RX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &rx_ring->ena_com_io_sq, + &rx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get RX queue handlers. RX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); + + return rc; +} + +static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_rx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); + + return rc; +} + +static int ena_up(struct ena_adapter *adapter) +{ + int rc; + + netdev_dbg(adapter->netdev, "%s\n", __func__); + + ena_setup_io_intr(adapter); + + rc = ena_request_io_irq(adapter); + if (rc) + goto err_req_irq; + + /* allocate transmit descriptors */ + rc = ena_setup_all_tx_resources(adapter); + if (rc) + goto err_setup_tx; + + /* allocate receive descriptors */ + rc = ena_setup_all_rx_resources(adapter); + if (rc) + goto err_setup_rx; + + /* Create TX queues */ + rc = ena_create_all_io_tx_queues(adapter); + if (rc) + goto err_create_tx_queues; + + /* Create RX queues */ + rc = ena_create_all_io_rx_queues(adapter); + if (rc) + goto err_create_rx_queues; + + rc = ena_up_complete(adapter); + if (rc) + goto err_up; + + if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) + netif_carrier_on(adapter->netdev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_up++; + u64_stats_update_end(&adapter->syncp); + + set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + return rc; + +err_up: + ena_destroy_all_rx_queues(adapter); +err_create_rx_queues: + ena_destroy_all_tx_queues(adapter); +err_create_tx_queues: + ena_free_all_io_rx_resources(adapter); +err_setup_rx: + ena_free_all_io_tx_resources(adapter); +err_setup_tx: + ena_free_io_irq(adapter); +err_req_irq: + + return rc; +} + +static void ena_down(struct ena_adapter *adapter) +{ + netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); + + clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_down++; + u64_stats_update_end(&adapter->syncp); + + /* After this point the napi handler won't enable the tx queue */ + ena_napi_disable_all(adapter); + netif_carrier_off(adapter->netdev); + netif_tx_disable(adapter->netdev); + + /* After destroy the queue there won't be any new interrupts */ + ena_destroy_all_io_queues(adapter); + + ena_disable_io_intr_sync(adapter); + ena_free_io_irq(adapter); + ena_del_napi(adapter); + + ena_free_all_tx_bufs(adapter); + ena_free_all_rx_bufs(adapter); + ena_free_all_io_tx_resources(adapter); + ena_free_all_io_rx_resources(adapter); +} + +/* ena_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + */ +static int ena_open(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc; + + /* Notify the stack of the actual queue counts. */ + rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); + return rc; + } + + rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); + return rc; + } + + rc = ena_up(adapter); + if (rc) + return rc; + + return rc; +} + +/* ena_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + */ +static int ena_close(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + netif_dbg(adapter, ifdown, netdev, "%s\n", __func__); + + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); + + return 0; +} + +static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) +{ + u32 mss = skb_shinfo(skb)->gso_size; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + u8 l4_protocol = 0; + + if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) { + ena_tx_ctx->l4_csum_enable = 1; + if (mss) { + ena_tx_ctx->tso_enable = 1; + ena_meta->l4_hdr_len = tcp_hdr(skb)->doff; + ena_tx_ctx->l4_csum_partial = 0; + } else { + ena_tx_ctx->tso_enable = 0; + ena_meta->l4_hdr_len = 0; + ena_tx_ctx->l4_csum_partial = 1; + } + + switch (ip_hdr(skb)->version) { + case IPVERSION: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; + if (ip_hdr(skb)->frag_off & htons(IP_DF)) + ena_tx_ctx->df = 1; + if (mss) + ena_tx_ctx->l3_csum_enable = 1; + l4_protocol = ip_hdr(skb)->protocol; + break; + case 6: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; + l4_protocol = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + + if (l4_protocol == IPPROTO_TCP) + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; + else + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; + + ena_meta->mss = mss; + ena_meta->l3_hdr_len = skb_network_header_len(skb); + ena_meta->l3_hdr_offset = skb_network_offset(skb); + ena_tx_ctx->meta_valid = 1; + + } else { + ena_tx_ctx->meta_valid = 0; + } +} + +static int ena_check_and_linearize_skb(struct ena_ring *tx_ring, + struct sk_buff *skb) +{ + int num_frags, header_len, rc; + + num_frags = skb_shinfo(skb)->nr_frags; + header_len = skb_headlen(skb); + + if (num_frags < tx_ring->sgl_size) + return 0; + + if ((num_frags == tx_ring->sgl_size) && + (header_len < tx_ring->tx_max_header_size)) + return 0; + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize++; + u64_stats_update_end(&tx_ring->syncp); + + rc = skb_linearize(skb); + if (unlikely(rc)) { + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize_failed++; + u64_stats_update_end(&tx_ring->syncp); + } + + return rc; +} + +/* Called with netif_tx_lock. */ +static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_tx_buffer *tx_info; + struct ena_com_tx_ctx ena_tx_ctx; + struct ena_ring *tx_ring; + struct netdev_queue *txq; + struct ena_com_buf *ena_buf; + void *push_hdr; + u32 len, last_frag; + u16 next_to_use; + u16 req_id; + u16 push_len; + u16 header_len; + dma_addr_t dma; + int qid, rc, nb_hw_desc; + int i = -1; + + netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); + /* Determine which tx ring we will be placed on */ + qid = skb_get_queue_mapping(skb); + tx_ring = &adapter->tx_ring[qid]; + txq = netdev_get_tx_queue(dev, qid); + + rc = ena_check_and_linearize_skb(tx_ring, skb); + if (unlikely(rc)) + goto error_drop_packet; + + skb_tx_timestamp(skb); + len = skb_headlen(skb); + + next_to_use = tx_ring->next_to_use; + req_id = tx_ring->free_tx_ids[next_to_use]; + tx_info = &tx_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + + WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); + ena_buf = tx_info->bufs; + tx_info->skb = skb; + + if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* prepared the push buffer */ + push_len = min_t(u32, len, tx_ring->tx_max_header_size); + header_len = push_len; + push_hdr = skb->data; + } else { + push_len = 0; + header_len = min_t(u32, len, tx_ring->tx_max_header_size); + push_hdr = NULL; + } + + netif_dbg(adapter, tx_queued, dev, + "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, + push_hdr, push_len); + + if (len > push_len) { + dma = dma_map_single(tx_ring->dev, skb->data + push_len, + len - push_len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len - push_len; + + ena_buf++; + tx_info->num_of_bufs++; + } + + last_frag = skb_shinfo(skb)->nr_frags; + + for (i = 0; i < last_frag; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + len = skb_frag_size(frag); + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len; + ena_buf++; + } + + tx_info->num_of_bufs += last_frag; + + memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); + ena_tx_ctx.ena_bufs = tx_info->bufs; + ena_tx_ctx.push_header = push_hdr; + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; + ena_tx_ctx.req_id = req_id; + ena_tx_ctx.header_len = header_len; + + /* set flags and meta data */ + ena_tx_csum(&ena_tx_ctx, skb); + + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, + &nb_hw_desc); + + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + tx_ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&tx_ring->syncp); + netif_tx_stop_queue(txq); + goto error_unmap_dma; + } + + netdev_tx_sent_queue(txq, skb->len); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.cnt++; + tx_ring->tx_stats.bytes += skb->len; + u64_stats_update_end(&tx_ring->syncp); + + tx_info->tx_descs = nb_hw_desc; + tx_info->last_jiffies = jiffies; + + tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, + tx_ring->ring_size); + + /* This WMB is aimed to: + * 1 - perform smp barrier before reading next_to_completion + * 2 - make sure the desc were written before trigger DB + */ + wmb(); + + /* stop the queue when no more space available, the packet can have up + * to sgl_size + 2. one for the meta descriptor and one for header + * (if the header is larger than tx_max_header_size). + */ + if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) < + (tx_ring->sgl_size + 2))) { + netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", + __func__, qid); + + netif_tx_stop_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + u64_stats_update_end(&tx_ring->syncp); + + /* There is a rare condition where this function decide to + * stop the queue but meanwhile clean_tx_irq updates + * next_to_completion and terminates. + * The queue will remain stopped forever. + * To solve this issue this function perform rmb, check + * the wakeup condition and wake up the queue if needed. + */ + smp_rmb(); + + if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) + > ENA_TX_WAKEUP_THRESH) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + } + + if (netif_xmit_stopped(txq) || !skb->xmit_more) { + /* trigger the dma engine */ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.doorbells++; + u64_stats_update_end(&tx_ring->syncp); + } + + return NETDEV_TX_OK; + +error_report_dma_error: + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&tx_ring->syncp); + netdev_warn(adapter->netdev, "failed to map skb\n"); + + tx_info->skb = NULL; + +error_unmap_dma: + if (i >= 0) { + /* save value of frag that failed */ + last_frag = i; + + /* start back at beginning and unmap skb */ + tx_info->skb = NULL; + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + for (i = 0; i < last_frag; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + } + } + +error_drop_packet: + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ena_netpoll(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + u16 qid; + /* we suspect that this is good for in--kernel network services that + * want to loop incoming skb rx to tx in normal user generated traffic, + * most probably we will not get to this + */ + if (skb_rx_queue_recorded(skb)) + qid = skb_get_rx_queue(skb); + else + qid = fallback(dev, skb); + + return qid; +} + +static void ena_config_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_admin_host_info *host_info; + int rc; + + /* Allocate only the host info */ + rc = ena_com_allocate_host_info(ena_dev); + if (rc) { + pr_err("Cannot allocate host info\n"); + return; + } + + host_info = ena_dev->host_attr.host_info; + + host_info->os_type = ENA_ADMIN_OS_LINUX; + host_info->kernel_ver = LINUX_VERSION_CODE; + strncpy(host_info->kernel_ver_str, utsname()->version, + sizeof(host_info->kernel_ver_str) - 1); + host_info->os_dist = 0; + strncpy(host_info->os_dist_str, utsname()->release, + sizeof(host_info->os_dist_str) - 1); + host_info->driver_version = + (DRV_MODULE_VER_MAJOR) | + (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | + (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); + + rc = ena_com_set_host_attributes(ena_dev); + if (rc) { + if (rc == -EPERM) + pr_warn("Cannot set host attributes\n"); + else + pr_err("Cannot set host attributes\n"); + + goto err; + } + + return; + +err: + ena_com_delete_host_info(ena_dev); +} + +static void ena_config_debug_area(struct ena_adapter *adapter) +{ + u32 debug_area_size; + int rc, ss_count; + + ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS); + if (ss_count <= 0) { + netif_err(adapter, drv, adapter->netdev, + "SS count is negative\n"); + return; + } + + /* allocate 32 bytes for each string and 64bit for the value */ + debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; + + rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size); + if (rc) { + pr_err("Cannot allocate debug area\n"); + return; + } + + rc = ena_com_set_host_attributes(adapter->ena_dev); + if (rc) { + if (rc == -EPERM) + netif_warn(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + else + netif_err(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + goto err; + } + + return; +err: + ena_com_delete_debug_area(adapter->ena_dev); +} + +static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_admin_basic_stats ena_stats; + int rc; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return NULL; + + rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats); + if (rc) + return NULL; + + stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) | + ena_stats.tx_bytes_low; + stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) | + ena_stats.rx_bytes_low; + + stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) | + ena_stats.rx_pkts_low; + stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) | + ena_stats.tx_pkts_low; + + stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) | + ena_stats.rx_drops_low; + + stats->multicast = 0; + stats->collisions = 0; + + stats->rx_length_errors = 0; + stats->rx_crc_errors = 0; + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = 0; + stats->rx_missed_errors = 0; + stats->tx_window_errors = 0; + + stats->rx_errors = 0; + stats->tx_errors = 0; + + return stats; +} + +static const struct net_device_ops ena_netdev_ops = { + .ndo_open = ena_open, + .ndo_stop = ena_close, + .ndo_start_xmit = ena_start_xmit, + .ndo_select_queue = ena_select_queue, + .ndo_get_stats64 = ena_get_stats64, + .ndo_tx_timeout = ena_tx_timeout, + .ndo_change_mtu = ena_change_mtu, + .ndo_set_mac_address = NULL, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ena_netpoll, +#endif /* CONFIG_NET_POLL_CONTROLLER */ +}; + +static void ena_device_io_suspend(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, suspend_io_task); + struct net_device *netdev = adapter->netdev; + + /* ena_napi_disable_all disables only the IO handling. + * We are still subject to AENQ keep alive watchdog. + */ + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_suspend++; + u64_stats_update_begin(&adapter->syncp); + ena_napi_disable_all(adapter); + netif_tx_lock(netdev); + netif_device_detach(netdev); + netif_tx_unlock(netdev); +} + +static void ena_device_io_resume(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, resume_io_task); + struct net_device *netdev = adapter->netdev; + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_resume++; + u64_stats_update_end(&adapter->syncp); + + netif_device_attach(netdev); + ena_napi_enable_all(adapter); +} + +static int ena_device_validate_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr, + adapter->mac_addr); + if (!rc) { + netif_err(adapter, drv, netdev, + "Error, mac address are different\n"); + return -EINVAL; + } + + if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) || + (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) { + netif_err(adapter, drv, netdev, + "Error, device doesn't support enough queues\n"); + return -EINVAL; + } + + if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { + netif_err(adapter, drv, netdev, + "Error, device max mtu is smaller than netdev MTU\n"); + return -EINVAL; + } + + return 0; +} + +static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, + struct ena_com_dev_get_features_ctx *get_feat_ctx, + bool *wd_state) +{ + struct device *dev = &pdev->dev; + bool readless_supported; + u32 aenq_groups; + int dma_width; + int rc; + + rc = ena_com_mmio_reg_read_request_init(ena_dev); + if (rc) { + dev_err(dev, "failed to init mmio read less\n"); + return rc; + } + + /* The PCIe configuration space revision id indicate if mmio reg + * read is disabled + */ + readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ); + ena_com_set_mmio_read_mode(ena_dev, readless_supported); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(dev, "Can not reset device\n"); + goto err_mmio_read_less; + } + + rc = ena_com_validate_version(ena_dev); + if (rc) { + dev_err(dev, "device version is too low\n"); + goto err_mmio_read_less; + } + + dma_width = ena_com_get_dma_width(ena_dev); + if (dma_width < 0) { + dev_err(dev, "Invalid dma width value %d", dma_width); + goto err_mmio_read_less; + } + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc); + goto err_mmio_read_less; + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n", + rc); + goto err_mmio_read_less; + } + + /* ENA admin level init */ + rc = ena_com_admin_init(ena_dev, &aenq_handlers, true); + if (rc) { + dev_err(dev, + "Can not initialize ena admin queue with device\n"); + goto err_mmio_read_less; + } + + /* To enable the msix interrupts the driver needs to know the number + * of queues. So the driver uses polling mode to retrieve this + * information + */ + ena_com_set_admin_polling_mode(ena_dev, true); + + /* Get Device Attributes*/ + rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); + if (rc) { + dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc); + goto err_admin_init; + } + + /* Try to turn all the available aenq groups */ + aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | + BIT(ENA_ADMIN_FATAL_ERROR) | + BIT(ENA_ADMIN_WARNING) | + BIT(ENA_ADMIN_NOTIFICATION) | + BIT(ENA_ADMIN_KEEP_ALIVE); + + aenq_groups &= get_feat_ctx->aenq.supported_groups; + + rc = ena_com_set_aenq_config(ena_dev, aenq_groups); + if (rc) { + dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc); + goto err_admin_init; + } + + *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); + + ena_config_host_info(ena_dev); + + return 0; + +err_admin_init: + ena_com_admin_destroy(ena_dev); +err_mmio_read_less: + ena_com_mmio_reg_read_request_destroy(ena_dev); + + return rc; +} + +static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, + int io_vectors) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc; + + rc = ena_enable_msix(adapter, io_vectors); + if (rc) { + dev_err(dev, "Can not reserve msix vectors\n"); + return rc; + } + + ena_setup_mgmnt_intr(adapter); + + rc = ena_request_mgmnt_irq(adapter); + if (rc) { + dev_err(dev, "Can not setup management interrupts\n"); + goto err_disable_msix; + } + + ena_com_set_admin_polling_mode(ena_dev, false); + + ena_com_admin_aenq_enable(ena_dev); + + return 0; + +err_disable_msix: + ena_disable_msix(adapter); + + return rc; +} + +static void ena_fw_reset_device(struct work_struct *work) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, reset_task); + struct net_device *netdev = adapter->netdev; + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct pci_dev *pdev = adapter->pdev; + bool dev_up, wd_state; + int rc; + + del_timer_sync(&adapter->timer_service); + + rtnl_lock(); + + dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + ena_com_set_admin_running_state(ena_dev, false); + + /* After calling ena_close the tx queues and the napi + * are disabled so no one can interfere or touch the + * data structures + */ + ena_close(netdev); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(&pdev->dev, "Device reset failed\n"); + goto err; + } + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + /* Finish with the destroy part. Start the init part */ + + rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "Can not initialize device\n"); + goto err; + } + adapter->wd_state = wd_state; + + rc = ena_device_validate_params(adapter, &get_feat_ctx); + if (rc) { + dev_err(&pdev->dev, "Validation of device parameters failed\n"); + goto err_device_destroy; + } + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, + adapter->num_queues); + if (rc) { + dev_err(&pdev->dev, "Enable MSI-X failed\n"); + goto err_device_destroy; + } + /* If the interface was up before the reset bring it up */ + if (dev_up) { + rc = ena_up(adapter); + if (rc) { + dev_err(&pdev->dev, "Failed to create I/O queues\n"); + goto err_disable_msix; + } + } + + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + + rtnl_unlock(); + + dev_err(&pdev->dev, "Device reset completed successfully\n"); + + return; +err_disable_msix: + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_device_destroy: + ena_com_admin_destroy(ena_dev); +err: + rtnl_unlock(); + + dev_err(&pdev->dev, + "Reset attempt failed. Can not reset the device\n"); +} + +static void check_for_missing_tx_completions(struct ena_adapter *adapter) +{ + struct ena_tx_buffer *tx_buf; + unsigned long last_jiffies; + struct ena_ring *tx_ring; + int i, j, budget; + u32 missed_tx; + + /* Make sure the driver doesn't turn the device in other process */ + smp_rmb(); + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + budget = ENA_MONITORED_TX_QUEUES; + + for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + + for (j = 0; j < tx_ring->ring_size; j++) { + tx_buf = &tx_ring->tx_buffer_info[j]; + last_jiffies = tx_buf->last_jiffies; + if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { + netif_notice(adapter, tx_err, adapter->netdev, + "Found a Tx that wasn't completed on time, qid %d, index %d.\n", + tx_ring->qid, j); + + u64_stats_update_begin(&tx_ring->syncp); + missed_tx = tx_ring->tx_stats.missing_tx_comp++; + u64_stats_update_end(&tx_ring->syncp); + + /* Clear last jiffies so the lost buffer won't + * be counted twice. + */ + tx_buf->last_jiffies = 0; + + if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n", + missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + } + } + + budget--; + if (!budget) + break; + } + + adapter->last_monitored_tx_qid = i % adapter->num_queues; +} + +/* Check for keep alive expiration */ +static void check_for_missing_keep_alive(struct ena_adapter *adapter) +{ + unsigned long keep_alive_expired; + + if (!adapter->wd_state) + return; + + keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + + ENA_DEVICE_KALIVE_TIMEOUT); + if (unlikely(time_is_before_jiffies(keep_alive_expired))) { + netif_err(adapter, drv, adapter->netdev, + "Keep alive watchdog timeout.\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.wd_expired++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void check_for_admin_com_state(struct ena_adapter *adapter) +{ + if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) { + netif_err(adapter, drv, adapter->netdev, + "ENA admin queue is not in running state!\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.admin_q_pause++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void ena_update_host_info(struct ena_admin_host_info *host_info, + struct net_device *netdev) +{ + host_info->supported_network_features[0] = + netdev->features & GENMASK_ULL(31, 0); + host_info->supported_network_features[1] = + (netdev->features & GENMASK_ULL(63, 32)) >> 32; +} + +static void ena_timer_service(unsigned long data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr; + struct ena_admin_host_info *host_info = + adapter->ena_dev->host_attr.host_info; + + check_for_missing_keep_alive(adapter); + + check_for_admin_com_state(adapter); + + check_for_missing_tx_completions(adapter); + + if (debug_area) + ena_dump_stats_to_buf(adapter, debug_area); + + if (host_info) + ena_update_host_info(host_info, adapter->netdev); + + if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, drv, adapter->netdev, + "Trigger reset is on\n"); + ena_dump_stats_to_dmesg(adapter); + queue_work(ena_wq, &adapter->reset_task); + return; + } + + /* Reset the timer */ + mod_timer(&adapter->timer_service, jiffies + HZ); +} + +static int ena_calc_io_queue_num(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + int io_sq_num, io_queue_num; + + /* In case of LLQ use the llq number in the get feature cmd */ + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq_num = get_feat_ctx->max_queues.max_llq_num; + + if (io_sq_num == 0) { + dev_err(&pdev->dev, + "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n"); + + ena_dev->tx_mem_queue_type = + ENA_ADMIN_PLACEMENT_POLICY_HOST; + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + } else { + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + + io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES); + io_queue_num = min_t(int, io_queue_num, io_sq_num); + io_queue_num = min_t(int, io_queue_num, + get_feat_ctx->max_queues.max_cq_num); + /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ + io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); + if (unlikely(!io_queue_num)) { + dev_err(&pdev->dev, "The device doesn't have io queues\n"); + return -EFAULT; + } + + return io_queue_num; +} + +static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + bool has_mem_bar; + + has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); + + /* Enable push mode if device supports LLQ */ + if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0)) + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; + else + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + + return 0; +} + +static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, + struct net_device *netdev) +{ + netdev_features_t dev_features = 0; + + /* Set offload features */ + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) + dev_features |= NETIF_F_IP_CSUM; + + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) + dev_features |= NETIF_F_IPV6_CSUM; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) + dev_features |= NETIF_F_TSO; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) + dev_features |= NETIF_F_TSO6; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK) + dev_features |= NETIF_F_TSO_ECN; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + netdev->features = + dev_features | + NETIF_F_SG | + NETIF_F_NTUPLE | + NETIF_F_RXHASH | + NETIF_F_HIGHDMA; + + netdev->hw_features |= netdev->features; + netdev->vlan_features |= netdev->features; +} + +static void ena_set_conf_feat_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *feat) +{ + struct net_device *netdev = adapter->netdev; + + /* Copy mac address */ + if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) { + eth_hw_addr_random(netdev); + ether_addr_copy(adapter->mac_addr, netdev->dev_addr); + } else { + ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr); + ether_addr_copy(netdev->dev_addr, adapter->mac_addr); + } + + /* Set offload features */ + ena_set_dev_offloads(feat, netdev); + + adapter->max_mtu = feat->dev_attr.max_mtu; +} + +static int ena_rss_init_default(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc, i; + u32 val; + + rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); + if (unlikely(rc)) { + dev_err(dev, "Cannot init indirect table\n"); + goto err_rss_init; + } + + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + val = ethtool_rxfh_indir_default(i, adapter->num_queues); + rc = ena_com_indirect_table_fill_entry(ena_dev, i, + ENA_IO_RXQ_IDX(val)); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill indirect table\n"); + goto err_fill_indir; + } + } + + rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL, + ENA_HASH_KEY_SIZE, 0xFFFFFFFF); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash function\n"); + goto err_fill_indir; + } + + rc = ena_com_set_default_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash control\n"); + goto err_fill_indir; + } + + return 0; + +err_fill_indir: + ena_com_rss_destroy(ena_dev); +err_rss_init: + + return rc; +} + +static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) +{ + int release_bars; + + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + pci_release_selected_regions(pdev, release_bars); +} + +static int ena_calc_queue_size(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + u16 *max_tx_sgl_size, + u16 *max_rx_sgl_size, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + u32 queue_size = ENA_DEFAULT_RING_SIZE; + + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_cq_depth); + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_sq_depth); + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_llq_depth); + + queue_size = rounddown_pow_of_two(queue_size); + + if (unlikely(!queue_size)) { + dev_err(&pdev->dev, "Invalid queue size\n"); + return -EFAULT; + } + + *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_tx_descs); + *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_rx_descs); + + return queue_size; +} + +/* ena_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in ena_pci_tbl + * + * Returns 0 on success, negative on failure + * + * ena_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + */ +static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + static int version_printed; + struct net_device *netdev; + struct ena_adapter *adapter; + struct ena_com_dev *ena_dev = NULL; + static int adapters_found; + int io_queue_num, bars, rc; + int queue_size; + u16 tx_sgl_size = 0; + u16 rx_sgl_size = 0; + bool wd_state; + + dev_dbg(&pdev->dev, "%s\n", __func__); + + if (version_printed++ == 0) + dev_info(&pdev->dev, "%s", version); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); + return rc; + } + + pci_set_master(pdev); + + ena_dev = vzalloc(sizeof(*ena_dev)); + if (!ena_dev) { + rc = -ENOMEM; + goto err_disable_device; + } + + bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (rc) { + dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", + rc); + goto err_free_ena_dev; + } + + ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR), + pci_resource_len(pdev, ENA_REG_BAR)); + if (!ena_dev->reg_bar) { + dev_err(&pdev->dev, "failed to remap regs bar\n"); + rc = -EFAULT; + goto err_free_region; + } + + ena_dev->dmadev = &pdev->dev; + + rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "ena device init failed\n"); + if (rc == -ETIME) + rc = -EPROBE_DEFER; + goto err_free_region; + } + + rc = ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); + if (rc) { + dev_err(&pdev->dev, "Invalid module param(push_mode)\n"); + goto err_device_destroy; + } + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); + if (!ena_dev->mem_bar) { + rc = -EFAULT; + goto err_device_destroy; + } + } + + /* initial Tx interrupt delay, Assumes 1 usec granularity. + * Updated during device initialization with the real granularity + */ + ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; + io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); + queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size, + &rx_sgl_size, &get_feat_ctx); + if ((queue_size <= 0) || (io_queue_num <= 0)) { + rc = -EFAULT; + goto err_device_destroy; + } + + dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n", + io_queue_num, queue_size); + + /* dev zeroed in init_etherdev */ + netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); + if (!netdev) { + dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); + rc = -ENOMEM; + goto err_device_destroy; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + adapter = netdev_priv(netdev); + pci_set_drvdata(pdev, adapter); + + adapter->ena_dev = ena_dev; + adapter->netdev = netdev; + adapter->pdev = pdev; + + ena_set_conf_feat_params(adapter, &get_feat_ctx); + + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + + adapter->tx_ring_size = queue_size; + adapter->rx_ring_size = queue_size; + + adapter->max_tx_sgl_size = tx_sgl_size; + adapter->max_rx_sgl_size = rx_sgl_size; + + adapter->num_queues = io_queue_num; + adapter->last_monitored_tx_qid = 0; + + adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; + adapter->wd_state = wd_state; + + snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found); + + rc = ena_com_init_interrupt_moderation(adapter->ena_dev); + if (rc) { + dev_err(&pdev->dev, + "Failed to query interrupt moderation feature\n"); + goto err_netdev_destroy; + } + ena_init_io_rings(adapter); + + netdev->netdev_ops = &ena_netdev_ops; + netdev->watchdog_timeo = TX_TIMEOUT; + ena_set_ethtool_ops(netdev); + + netdev->priv_flags |= IFF_UNICAST_FLT; + + u64_stats_init(&adapter->syncp); + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num); + if (rc) { + dev_err(&pdev->dev, + "Failed to enable and set the admin interrupts\n"); + goto err_worker_destroy; + } + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc); + goto err_free_msix; + } + + ena_config_debug_area(adapter); + + memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len); + + netif_carrier_off(netdev); + + rc = register_netdev(netdev); + if (rc) { + dev_err(&pdev->dev, "Cannot register net device\n"); + goto err_rss; + } + + INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend); + INIT_WORK(&adapter->resume_io_task, ena_device_io_resume); + INIT_WORK(&adapter->reset_task, ena_fw_reset_device); + + adapter->last_keep_alive_jiffies = jiffies; + + init_timer(&adapter->timer_service); + adapter->timer_service.expires = round_jiffies(jiffies + HZ); + adapter->timer_service.function = ena_timer_service; + adapter->timer_service.data = (unsigned long)adapter; + + add_timer(&adapter->timer_service); + + dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n", + DEVICE_NAME, (long)pci_resource_start(pdev, 0), + netdev->dev_addr, io_queue_num); + + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + adapters_found++; + + return 0; + +err_rss: + ena_com_delete_debug_area(ena_dev); + ena_com_rss_destroy(ena_dev); +err_free_msix: + ena_com_dev_reset(ena_dev); + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_worker_destroy: + ena_com_destroy_interrupt_moderation(ena_dev); + del_timer(&adapter->timer_service); + cancel_work_sync(&adapter->suspend_io_task); + cancel_work_sync(&adapter->resume_io_task); +err_netdev_destroy: + free_netdev(netdev); +err_device_destroy: + ena_com_delete_host_info(ena_dev); + ena_com_admin_destroy(ena_dev); +err_free_region: + ena_release_bars(ena_dev, pdev); +err_free_ena_dev: + pci_set_drvdata(pdev, NULL); + vfree(ena_dev); +err_disable_device: + pci_disable_device(pdev); + return rc; +} + +/*****************************************************************************/ +static int ena_sriov_configure(struct pci_dev *dev, int numvfs) +{ + int rc; + + if (numvfs > 0) { + rc = pci_enable_sriov(dev, numvfs); + if (rc != 0) { + dev_err(&dev->dev, + "pci_enable_sriov failed to enable: %d vfs with the error: %d\n", + numvfs, rc); + return rc; + } + + return numvfs; + } + + if (numvfs == 0) { + pci_disable_sriov(dev); + return 0; + } + + return -EINVAL; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ena_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void ena_remove(struct pci_dev *pdev) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + struct ena_com_dev *ena_dev; + struct net_device *netdev; + + if (!adapter) + /* This device didn't load properly and it's resources + * already released, nothing to do + */ + return; + + ena_dev = adapter->ena_dev; + netdev = adapter->netdev; + +#ifdef CONFIG_RFS_ACCEL + if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { + free_irq_cpu_rmap(netdev->rx_cpu_rmap); + netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + unregister_netdev(netdev); + del_timer_sync(&adapter->timer_service); + + cancel_work_sync(&adapter->reset_task); + + cancel_work_sync(&adapter->suspend_io_task); + + cancel_work_sync(&adapter->resume_io_task); + + ena_com_dev_reset(ena_dev); + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + free_netdev(netdev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_rss_destroy(ena_dev); + + ena_com_delete_debug_area(ena_dev); + + ena_com_delete_host_info(ena_dev); + + ena_release_bars(ena_dev, pdev); + + pci_set_drvdata(pdev, NULL); + + pci_disable_device(pdev); + + ena_com_destroy_interrupt_moderation(ena_dev); + + vfree(ena_dev); +} + +static struct pci_driver ena_pci_driver = { + .name = DRV_MODULE_NAME, + .id_table = ena_pci_tbl, + .probe = ena_probe, + .remove = ena_remove, + .sriov_configure = ena_sriov_configure, +}; + +static int __init ena_init(void) +{ + pr_info("%s", version); + + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); + if (!ena_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; + } + + return pci_register_driver(&ena_pci_driver); +} + +static void __exit ena_cleanup(void) +{ + pci_unregister_driver(&ena_pci_driver); + + if (ena_wq) { + destroy_workqueue(ena_wq); + ena_wq = NULL; + } +} + +/****************************************************************************** + ******************************** AENQ Handlers ******************************* + *****************************************************************************/ +/* ena_update_on_link_change: + * Notify the network interface about the change in link status + */ +static void ena_update_on_link_change(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + struct ena_admin_aenq_link_change_desc *aenq_desc = + (struct ena_admin_aenq_link_change_desc *)aenq_e; + int status = aenq_desc->flags & + ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; + + if (status) { + netdev_dbg(adapter->netdev, "%s\n", __func__); + set_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_on(adapter->netdev); + } else { + clear_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_off(adapter->netdev); + } +} + +static void ena_keep_alive_wd(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + adapter->last_keep_alive_jiffies = jiffies; +} + +static void ena_notification(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, + "Invalid group(%x) expected %x\n", + aenq_e->aenq_common_desc.group, + ENA_ADMIN_NOTIFICATION); + + switch (aenq_e->aenq_common_desc.syndrom) { + case ENA_ADMIN_SUSPEND: + /* Suspend just the IO queues. + * We deliberately don't suspend admin so the timer and + * the keep_alive events should remain. + */ + queue_work(ena_wq, &adapter->suspend_io_task); + break; + case ENA_ADMIN_RESUME: + queue_work(ena_wq, &adapter->resume_io_task); + break; + default: + netif_err(adapter, drv, adapter->netdev, + "Invalid aenq notification link state %d\n", + aenq_e->aenq_common_desc.syndrom); + } +} + +/* This handler will called for unknown event group or unimplemented handlers*/ +static void unimplemented_aenq_handler(void *data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + netif_err(adapter, drv, adapter->netdev, + "Unknown event was received or event with unimplemented handler\n"); +} + +static struct ena_aenq_handlers aenq_handlers = { + .handlers = { + [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, + [ENA_ADMIN_NOTIFICATION] = ena_notification, + [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, + }, + .unimplemented_handler = unimplemented_aenq_handler +}; + +module_init(ena_init); +module_exit(ena_cleanup); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h new file mode 100644 index 000000000000..69d7e9ed5bc8 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -0,0 +1,324 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_H +#define ENA_H + +#include +#include +#include +#include +#include +#include + +#include "ena_com.h" +#include "ena_eth_com.h" + +#define DRV_MODULE_VER_MAJOR 1 +#define DRV_MODULE_VER_MINOR 0 +#define DRV_MODULE_VER_SUBMINOR 2 + +#define DRV_MODULE_NAME "ena" +#ifndef DRV_MODULE_VERSION +#define DRV_MODULE_VERSION \ + __stringify(DRV_MODULE_VER_MAJOR) "." \ + __stringify(DRV_MODULE_VER_MINOR) "." \ + __stringify(DRV_MODULE_VER_SUBMINOR) +#endif + +#define DEVICE_NAME "Elastic Network Adapter (ENA)" + +/* 1 for AENQ + ADMIN */ +#define ENA_MAX_MSIX_VEC(io_queues) (1 + (io_queues)) + +#define ENA_REG_BAR 0 +#define ENA_MEM_BAR 2 +#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR)) + +#define ENA_DEFAULT_RING_SIZE (1024) + +#define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) +#define ENA_DEFAULT_RX_COPYBREAK (128 - NET_IP_ALIGN) + +/* limit the buffer size to 600 bytes to handle MTU changes from very + * small to very large, in which case the number of buffers per packet + * could exceed ENA_PKT_MAX_BUFS + */ +#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600 + +#define ENA_MIN_MTU 128 + +#define ENA_NAME_MAX_LEN 20 +#define ENA_IRQNAME_SIZE 40 + +#define ENA_PKT_MAX_BUFS 19 + +#define ENA_RX_RSS_TABLE_LOG_SIZE 7 +#define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) + +#define ENA_HASH_KEY_SIZE 40 + +/* The number of tx packet completions that will be handled each NAPI poll + * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER. + */ +#define ENA_TX_POLL_BUDGET_DIVIDER 4 + +/* Refill Rx queue when number of available descriptors is below + * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER + */ +#define ENA_RX_REFILL_THRESH_DIVIDER 8 + +/* Number of queues to check for missing queues per timer service */ +#define ENA_MONITORED_TX_QUEUES 4 +/* Max timeout packets before device reset */ +#define MAX_NUM_OF_TIMEOUTED_PACKETS 32 + +#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) + +#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) +#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \ + (((idx) + (n)) & ((ring_size) - 1)) + +#define ENA_IO_TXQ_IDX(q) (2 * (q)) +#define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) + +#define ENA_MGMNT_IRQ_IDX 0 +#define ENA_IO_IRQ_FIRST_IDX 1 +#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) + +/* ENA device should send keep alive msg every 1 sec. + * We wait for 3 sec just to be on the safe side. + */ +#define ENA_DEVICE_KALIVE_TIMEOUT (3 * HZ) + +#define ENA_MMIO_DISABLE_REG_READ BIT(0) + +struct ena_irq { + irq_handler_t handler; + void *data; + int cpu; + u32 vector; + cpumask_t affinity_hint_mask; + char name[ENA_IRQNAME_SIZE]; +}; + +struct ena_napi { + struct napi_struct napi ____cacheline_aligned; + struct ena_ring *tx_ring; + struct ena_ring *rx_ring; + u32 qid; +}; + +struct ena_tx_buffer { + struct sk_buff *skb; + /* num of ena desc for this specific skb + * (includes data desc and metadata desc) + */ + u32 tx_descs; + /* num of buffers used by this skb */ + u32 num_of_bufs; + /* Save the last jiffies to detect missing tx packets */ + unsigned long last_jiffies; + struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; +} ____cacheline_aligned; + +struct ena_rx_buffer { + struct sk_buff *skb; + struct page *page; + u32 page_offset; + struct ena_com_buf ena_buf; +} ____cacheline_aligned; + +struct ena_stats_tx { + u64 cnt; + u64 bytes; + u64 queue_stop; + u64 prepare_ctx_err; + u64 queue_wakeup; + u64 dma_mapping_err; + u64 linearize; + u64 linearize_failed; + u64 napi_comp; + u64 tx_poll; + u64 doorbells; + u64 missing_tx_comp; + u64 bad_req_id; +}; + +struct ena_stats_rx { + u64 cnt; + u64 bytes; + u64 refil_partial; + u64 bad_csum; + u64 page_alloc_fail; + u64 skb_alloc_fail; + u64 dma_mapping_err; + u64 bad_desc_num; + u64 rx_copybreak_pkt; +}; + +struct ena_ring { + /* Holds the empty requests for TX out of order completions */ + u16 *free_tx_ids; + union { + struct ena_tx_buffer *tx_buffer_info; + struct ena_rx_buffer *rx_buffer_info; + }; + + /* cache ptr to avoid using the adapter */ + struct device *dev; + struct pci_dev *pdev; + struct napi_struct *napi; + struct net_device *netdev; + struct ena_com_dev *ena_dev; + struct ena_adapter *adapter; + struct ena_com_io_cq *ena_com_io_cq; + struct ena_com_io_sq *ena_com_io_sq; + + u16 next_to_use; + u16 next_to_clean; + u16 rx_copybreak; + u16 qid; + u16 mtu; + u16 sgl_size; + + /* The maximum header length the device can handle */ + u8 tx_max_header_size; + + /* cpu for TPH */ + int cpu; + /* number of tx/rx_buffer_info's entries */ + int ring_size; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + + struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; + u32 smoothed_interval; + u32 per_napi_packets; + u32 per_napi_bytes; + enum ena_intr_moder_level moder_tbl_idx; + struct u64_stats_sync syncp; + union { + struct ena_stats_tx tx_stats; + struct ena_stats_rx rx_stats; + }; +} ____cacheline_aligned; + +struct ena_stats_dev { + u64 tx_timeout; + u64 io_suspend; + u64 io_resume; + u64 wd_expired; + u64 interface_up; + u64 interface_down; + u64 admin_q_pause; +}; + +enum ena_flags_t { + ENA_FLAG_DEVICE_RUNNING, + ENA_FLAG_DEV_UP, + ENA_FLAG_LINK_UP, + ENA_FLAG_MSIX_ENABLED, + ENA_FLAG_TRIGGER_RESET +}; + +/* adapter specific private data structure */ +struct ena_adapter { + struct ena_com_dev *ena_dev; + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + + /* rx packets that shorter that this len will be copied to the skb + * header + */ + u32 rx_copybreak; + u32 max_mtu; + + int num_queues; + + struct msix_entry *msix_entries; + int msix_vecs; + + u32 tx_usecs, rx_usecs; /* interrupt moderation */ + u32 tx_frames, rx_frames; /* interrupt moderation */ + + u32 tx_ring_size; + u32 rx_ring_size; + + u32 msg_enable; + + u16 max_tx_sgl_size; + u16 max_rx_sgl_size; + + u8 mac_addr[ETH_ALEN]; + + char name[ENA_NAME_MAX_LEN]; + + unsigned long flags; + /* TX */ + struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + /* RX */ + struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES]; + + struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)]; + + /* timer service */ + struct work_struct reset_task; + struct work_struct suspend_io_task; + struct work_struct resume_io_task; + struct timer_list timer_service; + + bool wd_state; + unsigned long last_keep_alive_jiffies; + + struct u64_stats_sync syncp; + struct ena_stats_dev dev_stats; + + /* last queue index that was checked for uncompleted tx packets */ + u32 last_monitored_tx_qid; +}; + +void ena_set_ethtool_ops(struct net_device *netdev); + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter); + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); + +int ena_get_sset_count(struct net_device *netdev, int sset); + +#endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h new file mode 100644 index 000000000000..f80d2a47fa94 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h @@ -0,0 +1,67 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_PCI_ID_TBL_H_ +#define ENA_PCI_ID_TBL_H_ + +#ifndef PCI_VENDOR_ID_AMAZON +#define PCI_VENDOR_ID_AMAZON 0x1d0f +#endif + +#ifndef PCI_DEV_ID_ENA_PF +#define PCI_DEV_ID_ENA_PF 0x0ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_PF +#define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_VF +#define PCI_DEV_ID_ENA_VF 0xec20 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_VF +#define PCI_DEV_ID_ENA_LLQ_VF 0xec21 +#endif + +#define ENA_PCI_ID_TABLE_ENTRY(devid) \ + {PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)}, + +static const struct pci_device_id ena_pci_tbl[] = { + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF) + { } +}; + +#endif /* ENA_PCI_ID_TBL_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h new file mode 100644 index 000000000000..26097a2b6030 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h @@ -0,0 +1,133 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_REGS_H_ +#define _ENA_REGS_H_ + +/* ena_registers offsets */ +#define ENA_REGS_VERSION_OFF 0x0 +#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4 +#define ENA_REGS_CAPS_OFF 0x8 +#define ENA_REGS_CAPS_EXT_OFF 0xc +#define ENA_REGS_AQ_BASE_LO_OFF 0x10 +#define ENA_REGS_AQ_BASE_HI_OFF 0x14 +#define ENA_REGS_AQ_CAPS_OFF 0x18 +#define ENA_REGS_ACQ_BASE_LO_OFF 0x20 +#define ENA_REGS_ACQ_BASE_HI_OFF 0x24 +#define ENA_REGS_ACQ_CAPS_OFF 0x28 +#define ENA_REGS_AQ_DB_OFF 0x2c +#define ENA_REGS_ACQ_TAIL_OFF 0x30 +#define ENA_REGS_AENQ_CAPS_OFF 0x34 +#define ENA_REGS_AENQ_BASE_LO_OFF 0x38 +#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c +#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40 +#define ENA_REGS_AENQ_TAIL_OFF 0x44 +#define ENA_REGS_INTR_MASK_OFF 0x4c +#define ENA_REGS_DEV_CTL_OFF 0x54 +#define ENA_REGS_DEV_STS_OFF 0x58 +#define ENA_REGS_MMIO_REG_READ_OFF 0x5c +#define ENA_REGS_MMIO_RESP_LO_OFF 0x60 +#define ENA_REGS_MMIO_RESP_HI_OFF 0x64 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68 + +/* version register */ +#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff +#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 +#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 + +/* controller_version register */ +#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 + +/* caps register */ +#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 + +/* aq_caps register */ +#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 + +/* acq_caps register */ +#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000 + +/* aenq_caps register */ +#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000 + +/* dev_ctl register */ +#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 +#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2 +#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4 +#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3 +#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8 + +/* dev_sts register */ +#define ENA_REGS_DEV_STS_READY_MASK 0x1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 +#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 +#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 +#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 +#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80 + +/* mmio_reg_read register */ +#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff +#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 +#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 + +/* rss_ind_entry_update register */ +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000 + +#endif /*_ENA_REGS_H_ */ -- cgit v1.2.3 From c50fc2622cbce4341daf40a4e7ad3f1625c9e55c Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Fri, 12 Aug 2016 22:05:46 -0700 Subject: Documentation: dtb: xgene: Add rxlos GPIO mapping Signed-off-by: Quan Nguyen Signed-off-by: Iyappan Subramanian Tested-by: Fushen Chen Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/apm-xgene-enet.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt index e41b2d59ca7f..f591ab782dbc 100644 --- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt +++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt @@ -47,6 +47,9 @@ Optional properties: Valid values are between 0 to 7, that maps to 273, 589, 899, 1222, 1480, 1806, 2147, 2464 ps Default value is 2, which corresponds to 899 ps +- rxlos-gpios: Input gpio from SFP+ module to indicate availability of + incoming signal. + Example: menetclk: menetclk { -- cgit v1.2.3 From adcce4d5dd46d9356c1c9a6515efc430e331fa69 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 15 Aug 2016 14:51:03 -0700 Subject: strparser: Documentation Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/strparser.txt | 136 +++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 Documentation/networking/strparser.txt (limited to 'Documentation') diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt new file mode 100644 index 000000000000..a0bf573dfa61 --- /dev/null +++ b/Documentation/networking/strparser.txt @@ -0,0 +1,136 @@ +Stream Parser +------------- + +The stream parser (strparser) is a utility that parses messages of an +application layer protocol running over a TCP connection. The stream +parser works in conjunction with an upper layer in the kernel to provide +kernel support for application layer messages. For instance, Kernel +Connection Multiplexor (KCM) uses the Stream Parser to parse messages +using a BPF program. + +Interface +--------- + +The API includes a context structure, a set of callbacks, utility +functions, and a data_ready function. The callbacks include +a parse_msg function that is called to perform parsing (e.g. +BPF parsing in case of KCM), and a rcv_msg function that is called +when a full message has been completed. + +A stream parser can be instantiated for a TCP connection. This is done +by: + +strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb) + +strp is a struct of type strparser that is allocated by the upper layer. +csk is the TCP socket associated with the stream parser. Callbacks are +called by the stream parser. + +Callbacks +--------- + +There are four callbacks: + +int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); + + parse_msg is called to determine the length of the next message + in the stream. The upper layer must implement this function. It + should parse the sk_buff as containing the headers for the + next application layer messages in the stream. + + The skb->cb in the input skb is a struct strp_rx_msg. Only + the offset field is relevant in parse_msg and gives the offset + where the message starts in the skb. + + The return values of this function are: + + >0 : indicates length of successfully parsed message + 0 : indicates more data must be received to parse the message + -ESTRPIPE : current message should not be processed by the + kernel, return control of the socket to userspace which + can proceed to read the messages itself + other < 0 : Error is parsing, give control back to userspace + assuming that synchronization is lost and the stream + is unrecoverable (application expected to close TCP socket) + + In the case that an error is returned (return value is less than + zero) the stream parser will set the error on TCP socket and wake + it up. If parse_msg returned -ESTRPIPE and the stream parser had + previously read some bytes for the current message, then the error + set on the attached socket is ENODATA since the stream is + unrecoverable in that case. + +void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + + rcv_msg is called when a full message has been received and + is queued. The callee must consume the sk_buff; it can + call strp_pause to prevent any further messages from being + received in rcv_msg (see strp_pause below). This callback + must be set. + + The skb->cb in the input skb is a struct strp_rx_msg. This + struct contains two fields: offset and full_len. Offset is + where the message starts in the skb, and full_len is the + the length of the message. skb->len - offset may be greater + then full_len since strparser does not trim the skb. + +int (*read_sock_done)(struct strparser *strp, int err); + + read_sock_done is called when the stream parser is done reading + the TCP socket. The stream parser may read multiple messages + in a loop and this function allows cleanup to occur when existing + the loop. If the callback is not set (NULL in strp_init) a + default function is used. + +void (*abort_parser)(struct strparser *strp, int err); + + This function is called when stream parser encounters an error + in parsing. The default function stops the stream parser for the + TCP socket and sets the error in the socket. The default function + can be changed by setting the callback to non-NULL in strp_init. + +Functions +--------- + +The upper layer calls strp_tcp_data_ready when data is ready on the lower +socket for strparser to process. This should be called from a data_ready +callback that is set on the socket. + +strp_stop is called to completely stop stream parser operations. This +is called internally when the stream parser encounters an error, and +it is called from the upper layer when unattaching a TCP socket. + +strp_done is called to unattach the stream parser from the TCP socket. +This must be called after the stream processor has be stopped. + +strp_check_rcv is called to check for new messages on the socket. This +is normally called at initialization of the a stream parser instance +of after strp_unpause. + +Statistics +---------- + +Various counters are kept for each stream parser for a TCP socket. +These are in the strp_stats structure. strp_aggr_stats is a convenience +structure for accumulating statistics for multiple stream parser +instances. save_strp_stats and aggregate_strp_stats are helper functions +to save and aggregate statistics. + +Message assembly limits +----------------------- + +The stream parser provide mechanisms to limit the resources consumed by +message assembly. + +A timer is set when assembly starts for a new message. The message +timeout is taken from rcvtime for the associated TCP socket. If the +timer fires before assembly completes the stream parser is aborted +and the ETIMEDOUT error is set on the TCP socket. + +Message length is limited to the receive buffer size of the associated +TCP socket. If the length returned by parse_msg is greater than +the socket buffer size then the stream parser is aborted with +EMSGSIZE error set on the TCP socket. Note that this makes the +maximum size of receive skbuffs for a socket with a stream parser +to be 2*sk_rcvbuf of the TCP socket. -- cgit v1.2.3 From aead88bd0e990540c03df8108671c93b35354736 Mon Sep 17 00:00:00 2001 From: "shubhrajyoti.datta@xilinx.com" Date: Tue, 16 Aug 2016 10:14:50 +0530 Subject: net: ethernet: macb: Add support for rx_clk Some of the platforms like zynqmp ultrascale+ has a separate clock gate for the rx clock. Add an optional rx_clk so that the clock can be enabled. Signed-off-by: Shubhrajyoti Datta Acked-by: Nicolas Ferre Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/macb.txt | 1 + drivers/net/ethernet/cadence/macb.c | 31 +++++++++++++++++++++----- drivers/net/ethernet/cadence/macb.h | 4 +++- 3 files changed, 30 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index b5a42df4c928..1506e948610c 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -21,6 +21,7 @@ Required properties: - clock-names: Tuple listing input clock names. Required elements: 'pclk', 'hclk' Optional elements: 'tx_clk' + Optional elements: 'rx_clk' applies to cdns,zynqmp-gem - clocks: Phandles to input clocks. Optional properties for PHY child node: diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index dbce938e7d0c..32568392b9f9 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2332,7 +2332,8 @@ static void macb_probe_queues(void __iomem *mem, } static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk) + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk) { int err; @@ -2354,6 +2355,10 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (IS_ERR(*tx_clk)) *tx_clk = NULL; + *rx_clk = devm_clk_get(&pdev->dev, "rx_clk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + err = clk_prepare_enable(*pclk); if (err) { dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); @@ -2372,8 +2377,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, goto err_disable_hclk; } + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + goto err_disable_txclk; + } + return 0; +err_disable_txclk: + clk_disable_unprepare(*tx_clk); + err_disable_hclk: clk_disable_unprepare(*hclk); @@ -2763,12 +2777,14 @@ static const struct net_device_ops at91ether_netdev_ops = { }; static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk) + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk) { int err; *hclk = NULL; *tx_clk = NULL; + *rx_clk = NULL; *pclk = devm_clk_get(&pdev->dev, "ether_clk"); if (IS_ERR(*pclk)) @@ -2892,13 +2908,13 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids); static int macb_probe(struct platform_device *pdev) { int (*clk_init)(struct platform_device *, struct clk **, - struct clk **, struct clk **) + struct clk **, struct clk **, struct clk **) = macb_clk_init; int (*init)(struct platform_device *) = macb_init; struct device_node *np = pdev->dev.of_node; struct device_node *phy_node; const struct macb_config *macb_config = NULL; - struct clk *pclk, *hclk = NULL, *tx_clk = NULL; + struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL; unsigned int queue_mask, num_queues; struct macb_platform_data *pdata; bool native_io; @@ -2926,7 +2942,7 @@ static int macb_probe(struct platform_device *pdev) } } - err = clk_init(pdev, &pclk, &hclk, &tx_clk); + err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk); if (err) return err; @@ -2962,6 +2978,7 @@ static int macb_probe(struct platform_device *pdev) bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; + bp->rx_clk = rx_clk; if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; @@ -3060,6 +3077,7 @@ err_disable_clocks: clk_disable_unprepare(tx_clk); clk_disable_unprepare(hclk); clk_disable_unprepare(pclk); + clk_disable_unprepare(rx_clk); return err; } @@ -3086,6 +3104,7 @@ static int macb_remove(struct platform_device *pdev) clk_disable_unprepare(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); + clk_disable_unprepare(bp->rx_clk); free_netdev(dev); } @@ -3109,6 +3128,7 @@ static int __maybe_unused macb_suspend(struct device *dev) clk_disable_unprepare(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); + clk_disable_unprepare(bp->rx_clk); } return 0; @@ -3128,6 +3148,7 @@ static int __maybe_unused macb_resume(struct device *dev) clk_prepare_enable(bp->pclk); clk_prepare_enable(bp->hclk); clk_prepare_enable(bp->tx_clk); + clk_prepare_enable(bp->rx_clk); } netif_device_attach(netdev); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index aa3aeecc3132..8bed4b52fef5 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -772,7 +772,8 @@ struct macb_config { u32 caps; unsigned int dma_burst_length; int (*clk_init)(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk); + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk); int (*init)(struct platform_device *pdev); int jumbo_max_len; }; @@ -819,6 +820,7 @@ struct macb { struct clk *pclk; struct clk *hclk; struct clk *tx_clk; + struct clk *rx_clk; struct net_device *dev; struct napi_struct napi; struct net_device_stats stats; -- cgit v1.2.3 From a85fad1436b0fd0fe9d7f1d23387e83e7cd21c0a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Aug 2016 15:30:13 -0700 Subject: Documentation: dt: bindings: Update Broadcom 7445 switch document Document the updated binding which conforms to the new DSA binding in net/dsa/dsa.txt. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../bindings/net/brcm,bcm7445-switch-v4.0.txt | 45 +++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt index 30d487597ecb..fb40891ee606 100644 --- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt +++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt @@ -6,9 +6,13 @@ Required properties: - reg: addresses and length of the register sets for the device, must be 6 pairs of register addresses and lengths - interrupts: interrupts for the devices, must be two interrupts +- #address-cells: must be 1, see dsa/dsa.txt +- #size-cells: must be 0, see dsa/dsa.txt + +Deprecated binding required properties: + - dsa,mii-bus: phandle to the MDIO bus controller, see dsa/dsa.txt - dsa,ethernet: phandle to the CPU network interface controller, see dsa/dsa.txt -- #size-cells: must be 0 - #address-cells: must be 2, see dsa/dsa.txt Subnodes: @@ -39,6 +43,45 @@ Optional properties: Example: +switch_top@f0b00000 { + compatible = "simple-bus"; + #size-cells = <1>; + #address-cells = <1>; + ranges = <0 0xf0b00000 0x40804>; + + ethernet_switch@0 { + compatible = "brcm,bcm7445-switch-v4.0"; + #size-cells = <0>; + #address-cells = <1>; + reg = <0x0 0x40000 + 0x40000 0x110 + 0x40340 0x30 + 0x40380 0x30 + 0x40400 0x34 + 0x40600 0x208>; + reg-names = "core", "reg", intrl2_0", "intrl2_1", + "fcb, "acb"; + interrupts = <0 0x18 0 + 0 0x19 0>; + brcm,num-gphy = <1>; + brcm,num-rgmii-ports = <2>; + brcm,fcb-pause-override; + brcm,acb-packets-inflight; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + label = "gphy"; + reg = <0>; + }; + }; + }; +}; + +Example using the old DSA DeviceTree binding: + switch_top@f0b00000 { compatible = "simple-bus"; #size-cells = <1>; -- cgit v1.2.3 From cebc5cbab48f5c58512e26aa1965284354227258 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 22 Aug 2016 17:17:54 -0700 Subject: net-tcp: retire TFO_SERVER_WO_SOCKOPT2 config TFO_SERVER_WO_SOCKOPT2 was intended for debugging purposes during Fast Open development. Remove this config option and also update/clean-up the documentation of the Fast Open sysctl. Reported-by: Piotr Jurkiewicz Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 45 +++++++++++++++++----------------- include/net/tcp.h | 3 +-- net/ipv4/af_inet.c | 21 ++++++---------- 3 files changed, 32 insertions(+), 37 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 9ae929395b24..3db8c67d2c8d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -575,32 +575,33 @@ tcp_syncookies - BOOLEAN unconditionally generation of syncookies. tcp_fastopen - INTEGER - Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data - in the opening SYN packet. To use this feature, the client application - must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than - connect() to perform a TCP handshake automatically. + Enable TCP Fast Open (RFC7413) to send and accept data in the opening + SYN packet. - The values (bitmap) are - 1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN. - 2: Enables TCP Fast Open on the server side, i.e., allowing data in - a SYN packet to be accepted and passed to the application before - 3-way hand shake finishes. - 4: Send data in the opening SYN regardless of cookie availability and - without a cookie option. - 0x100: Accept SYN data w/o validating the cookie. - 0x200: Accept data-in-SYN w/o any cookie option present. - 0x400/0x800: Enable Fast Open on all listeners regardless of the - TCP_FASTOPEN socket option. The two different flags designate two - different ways of setting max_qlen without the TCP_FASTOPEN socket - option. + The client support is enabled by flag 0x1 (on by default). The client + then must use sendmsg() or sendto() with the MSG_FASTOPEN flag, + rather than connect() to send data in SYN. - Default: 1 + The server support is enabled by flag 0x2 (off by default). Then + either enable for all listeners with another flag (0x400) or + enable individual listeners via TCP_FASTOPEN socket option with + the option value being the length of the syn-data backlog. - Note that the client & server side Fast Open flags (1 and 2 - respectively) must be also enabled before the rest of flags can take - effect. + The values (bitmap) are + 0x1: (client) enables sending data in the opening SYN on the client. + 0x2: (server) enables the server support, i.e., allowing data in + a SYN packet to be accepted and passed to the + application before 3-way handshake finishes. + 0x4: (client) send data in the opening SYN regardless of cookie + availability and without a cookie option. + 0x200: (server) accept data-in-SYN w/o any cookie option present. + 0x400: (server) enable all listeners to support Fast Open by + default without explicit TCP_FASTOPEN socket option. + + Default: 0x1 - See include/net/tcp.h and the code for more details. + Note that that additional client or server features are only + effective if the basic support (0x1 and 0x2) are enabled respectively. tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt diff --git a/include/net/tcp.h b/include/net/tcp.h index c00e7d51bb18..25d64f6de69e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -227,10 +227,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TFO_SERVER_COOKIE_NOT_REQD 0x200 /* Force enable TFO on all listeners, i.e., not requiring the - * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen. + * TCP_FASTOPEN socket option. */ #define TFO_SERVER_WO_SOCKOPT1 0x400 -#define TFO_SERVER_WO_SOCKOPT2 0x800 extern struct inet_timewait_death_row tcp_death_row; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55513e654d79..989a362814a9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -211,24 +211,19 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - /* Check special setups for testing purpose to enable TFO w/o - * requiring TCP_FASTOPEN sockopt. + /* Enable TFO w/o requiring TCP_FASTOPEN socket option. * Note that only TCP sockets (SOCK_STREAM) will reach here. - * Also fastopenq may already been allocated because this - * socket was in TCP_LISTEN state previously but was - * shutdown() (rather than close()). + * Also fastopen backlog may already been set via the option + * because the socket was in TCP_LISTEN state previously but + * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && + if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) - fastopen_queue_tune(sk, backlog); - else if ((sysctl_tcp_fastopen & - TFO_SERVER_WO_SOCKOPT2) != 0) - fastopen_queue_tune(sk, - ((uint)sysctl_tcp_fastopen) >> 16); - + fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); } + err = inet_csk_listen_start(sk, backlog); if (err) goto out; -- cgit v1.2.3 From 9d490b4ee4d7d495a4f4908ea998d2a7355e0807 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 23 Aug 2016 12:38:56 -0400 Subject: net: dsa: rename switch operations structure Now that the dsa_switch_driver structure contains only function pointers as it is supposed to, rename it to the more appropriate dsa_switch_ops, uniformly to any other operations structure in the kernel. No functional changes here, basically just the result of something like: s/dsa_switch_driver *drv/dsa_switch_ops *ops/g However keep the {un,}register_switch_driver functions and their dsa_switch_drivers list as is, since they represent the -- likely to be deprecated soon -- legacy DSA registration framework. In the meantime, also fix the following checks from checkpatch.pl to make it happy with this patch: CHECK: Comparison to NULL could be written "!ops" #403: FILE: net/dsa/dsa.c:470: + if (ops == NULL) { CHECK: Comparison to NULL could be written "ds->ops->get_strings" #773: FILE: net/dsa/slave.c:697: + if (ds->ops->get_strings != NULL) CHECK: Comparison to NULL could be written "ds->ops->get_ethtool_stats" #824: FILE: net/dsa/slave.c:785: + if (ds->ops->get_ethtool_stats != NULL) CHECK: Comparison to NULL could be written "ds->ops->get_sset_count" #835: FILE: net/dsa/slave.c:798: + if (ds->ops->get_sset_count != NULL) total: 0 errors, 0 warnings, 4 checks, 784 lines checked Signed-off-by: Vivien Didelot Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 10 +-- drivers/net/dsa/b53/b53_common.c | 4 +- drivers/net/dsa/bcm_sf2.c | 4 +- drivers/net/dsa/mv88e6060.c | 6 +- drivers/net/dsa/mv88e6xxx/chip.c | 8 +- include/net/dsa.h | 10 +-- net/dsa/dsa.c | 70 ++++++++--------- net/dsa/dsa2.c | 16 ++-- net/dsa/slave.c | 146 +++++++++++++++++------------------ 9 files changed, 137 insertions(+), 137 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 9d05ed7f7da5..44ed453ccf66 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -227,9 +227,9 @@ to address individual switches in the tree. dsa_switch: structure describing a switch device in the tree, referencing a dsa_switch_tree as a backpointer, slave network devices, master network device, -and a reference to the backing dsa_switch_driver +and a reference to the backing dsa_switch_ops -dsa_switch_driver: structure referencing function pointers, see below for a full +dsa_switch_ops: structure referencing function pointers, see below for a full description. Design limitations @@ -357,10 +357,10 @@ regular HWMON devices in /sys/class/hwmon/. Driver development ================== -DSA switch drivers need to implement a dsa_switch_driver structure which will +DSA switch drivers need to implement a dsa_switch_ops structure which will contain the various members described below. -register_switch_driver() registers this dsa_switch_driver in its internal list +register_switch_driver() registers this dsa_switch_ops in its internal list of drivers to probe for. unregister_switch_driver() does the exact opposite. Unless requested differently by setting the priv_size member accordingly, DSA @@ -379,7 +379,7 @@ Switch configuration buses, return a non-NULL string - setup: setup function for the switch, this function is responsible for setting - up the dsa_switch_driver private structure with all it needs: register maps, + up the dsa_switch_ops private structure with all it needs: register maps, interrupts, mutexes, locks etc.. This function is also expected to properly configure the switch to separate all network interfaces from each other, that is, they should be isolated by the switch hardware itself, typically by creating diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 65ecb51f99e5..6fb77cca78bb 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1378,7 +1378,7 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) return DSA_TAG_PROTO_NONE; } -static struct dsa_switch_driver b53_switch_ops = { +static struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, .set_addr = b53_set_addr, @@ -1618,7 +1618,7 @@ static int b53_switch_init(struct b53_device *dev) dev->vta_regs[1] = chip->vta_regs[1]; dev->vta_regs[2] = chip->vta_regs[2]; dev->jumbo_pm_reg = chip->jumbo_pm_reg; - ds->drv = &b53_switch_ops; + ds->ops = &b53_switch_ops; dev->cpu_port = chip->cpu_port; dev->num_vlans = chip->vlans; dev->num_arl_entries = chip->arl_entries; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b47a74b37a42..220e5d1948ca 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1581,7 +1581,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) return 0; } -static struct dsa_switch_driver bcm_sf2_switch_driver = { +static struct dsa_switch_ops bcm_sf2_switch_ops = { .setup = bcm_sf2_sw_setup, .get_tag_protocol = bcm_sf2_sw_get_tag_protocol, .set_addr = bcm_sf2_sw_set_addr, @@ -1632,7 +1632,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) priv = (struct bcm_sf2_priv *)(ds + 1); ds->priv = priv; ds->dev = &pdev->dev; - ds->drv = &bcm_sf2_switch_driver; + ds->ops = &bcm_sf2_switch_ops; dev_set_drvdata(&pdev->dev, ds); diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 1fdfbf3a50bc..7ff9d373a9ee 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -252,7 +252,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) return reg_write(ds, addr, regnum, val); } -static struct dsa_switch_driver mv88e6060_switch_driver = { +static struct dsa_switch_ops mv88e6060_switch_ops = { .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, @@ -263,14 +263,14 @@ static struct dsa_switch_driver mv88e6060_switch_driver = { static int __init mv88e6060_init(void) { - register_switch_driver(&mv88e6060_switch_driver); + register_switch_driver(&mv88e6060_switch_ops); return 0; } module_init(mv88e6060_init); static void __exit mv88e6060_cleanup(void) { - unregister_switch_driver(&mv88e6060_switch_driver); + unregister_switch_driver(&mv88e6060_switch_ops); } module_exit(mv88e6060_cleanup); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 82d45165803c..750d01d775e0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3976,7 +3976,7 @@ free: return NULL; } -static struct dsa_switch_driver mv88e6xxx_switch_driver = { +static struct dsa_switch_ops mv88e6xxx_switch_ops = { .probe = mv88e6xxx_drv_probe, .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, @@ -4025,7 +4025,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, ds->dev = dev; ds->priv = chip; - ds->drv = &mv88e6xxx_switch_driver; + ds->ops = &mv88e6xxx_switch_ops; dev_set_drvdata(dev, ds); @@ -4118,7 +4118,7 @@ static struct mdio_driver mv88e6xxx_driver = { static int __init mv88e6xxx_init(void) { - register_switch_driver(&mv88e6xxx_switch_driver); + register_switch_driver(&mv88e6xxx_switch_ops); return mdio_driver_register(&mv88e6xxx_driver); } module_init(mv88e6xxx_init); @@ -4126,7 +4126,7 @@ module_init(mv88e6xxx_init); static void __exit mv88e6xxx_cleanup(void) { mdio_driver_unregister(&mv88e6xxx_driver); - unregister_switch_driver(&mv88e6xxx_switch_driver); + unregister_switch_driver(&mv88e6xxx_switch_ops); } module_exit(mv88e6xxx_cleanup); diff --git a/include/net/dsa.h b/include/net/dsa.h index 8ca2684c5358..2ebeba44a461 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -165,9 +165,9 @@ struct dsa_switch { struct dsa_chip_data *cd; /* - * The used switch driver. + * The switch operations. */ - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; /* * An array of which element [a] indicates which port on this @@ -236,7 +236,7 @@ struct switchdev_obj; struct switchdev_obj_port_fdb; struct switchdev_obj_port_vlan; -struct dsa_switch_driver { +struct dsa_switch_ops { struct list_head list; /* @@ -371,8 +371,8 @@ struct dsa_switch_driver { int (*cb)(struct switchdev_obj *obj)); }; -void register_switch_driver(struct dsa_switch_driver *type); -void unregister_switch_driver(struct dsa_switch_driver *type); +void register_switch_driver(struct dsa_switch_ops *type); +void unregister_switch_driver(struct dsa_switch_ops *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); static inline void *ds_to_priv(struct dsa_switch *ds) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 8d3a28d4e99d..d8d267e9a872 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -61,27 +61,27 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { static DEFINE_MUTEX(dsa_switch_drivers_mutex); static LIST_HEAD(dsa_switch_drivers); -void register_switch_driver(struct dsa_switch_driver *drv) +void register_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); + list_add_tail(&ops->list, &dsa_switch_drivers); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(register_switch_driver); -void unregister_switch_driver(struct dsa_switch_driver *drv) +void unregister_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); + list_del_init(&ops->list); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(unregister_switch_driver); -static struct dsa_switch_driver * +static struct dsa_switch_ops * dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, const char **_name, void **priv) { - struct dsa_switch_driver *ret; + struct dsa_switch_ops *ret; struct list_head *list; const char *name; @@ -90,13 +90,13 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, mutex_lock(&dsa_switch_drivers_mutex); list_for_each(list, &dsa_switch_drivers) { - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; - drv = list_entry(list, struct dsa_switch_driver, list); + ops = list_entry(list, struct dsa_switch_ops, list); - name = drv->probe(parent, host_dev, sw_addr, priv); + name = ops->probe(parent, host_dev, sw_addr, priv); if (name != NULL) { - ret = drv; + ret = ops; break; } } @@ -117,7 +117,7 @@ static ssize_t temp1_input_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp(ds, &temp); + ret = ds->ops->get_temp(ds, &temp); if (ret < 0) return ret; @@ -131,7 +131,7 @@ static ssize_t temp1_max_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp_limit(ds, &temp); + ret = ds->ops->get_temp_limit(ds, &temp); if (ret < 0) return ret; @@ -149,7 +149,7 @@ static ssize_t temp1_max_store(struct device *dev, if (ret < 0) return ret; - ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); + ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); if (ret < 0) return ret; @@ -164,7 +164,7 @@ static ssize_t temp1_max_alarm_show(struct device *dev, bool alarm; int ret; - ret = ds->drv->get_temp_alarm(ds, &alarm); + ret = ds->ops->get_temp_alarm(ds, &alarm); if (ret < 0) return ret; @@ -184,15 +184,15 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct dsa_switch *ds = dev_get_drvdata(dev); - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; umode_t mode = attr->mode; if (index == 1) { - if (!drv->get_temp_limit) + if (!ops->get_temp_limit) mode = 0; - else if (!drv->set_temp_limit) + else if (!ops->set_temp_limit) mode &= ~S_IWUSR; - } else if (index == 2 && !drv->get_temp_alarm) { + } else if (index == 2 && !ops->get_temp_alarm) { mode = 0; } return mode; @@ -228,8 +228,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, genphy_config_init(phydev); genphy_read_status(phydev); - if (ds->drv->adjust_link) - ds->drv->adjust_link(ds, port, phydev); + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); } return 0; @@ -303,7 +303,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; @@ -356,7 +356,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (dst->cpu_switch == index) { enum dsa_tag_protocol tag_protocol; - tag_protocol = drv->get_tag_protocol(ds); + tag_protocol = ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { ret = PTR_ERR(dst->tag_ops); @@ -371,15 +371,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) /* * Do basic register setup. */ - ret = drv->setup(ds); + ret = ops->setup(ds); if (ret < 0) goto out; - ret = drv->set_addr(ds, dst->master_netdev->dev_addr); + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); if (ret < 0) goto out; - if (!ds->slave_mii_bus && drv->phy_read) { + if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(parent); if (!ds->slave_mii_bus) { ret = -ENOMEM; @@ -426,7 +426,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * register with hardware monitoring subsystem. * Treat registration error as non-fatal and ignore it. */ - if (drv->get_temp) { + if (ops->get_temp) { const char *netname = netdev_name(dst->master_netdev); char hname[IFNAMSIZ + 1]; int i, j; @@ -457,7 +457,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct device *parent, struct device *host_dev) { struct dsa_chip_data *cd = dst->pd->chip + index; - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; struct dsa_switch *ds; int ret; const char *name; @@ -466,8 +466,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Probe for switch model. */ - drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (drv == NULL) { + ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); + if (!ops) { netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", index); return ERR_PTR(-EINVAL); @@ -486,7 +486,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->dst = dst; ds->index = index; ds->cd = cd; - ds->drv = drv; + ds->ops = ops; ds->priv = priv; ds->dev = parent; @@ -541,7 +541,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) ds->dsa_port_mask |= ~(1 << port); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } @@ -560,8 +560,8 @@ int dsa_switch_suspend(struct dsa_switch *ds) return ret; } - if (ds->drv->suspend) - ret = ds->drv->suspend(ds); + if (ds->ops->suspend) + ret = ds->ops->suspend(ds); return ret; } @@ -571,8 +571,8 @@ int dsa_switch_resume(struct dsa_switch *ds) { int i, ret = 0; - if (ds->drv->resume) - ret = ds->drv->resume(ds); + if (ds->ops->resume) + ret = ds->ops->resume(ds); if (ret) return ret; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 2e343221464c..8278385dcd21 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -294,25 +294,25 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) int err; /* Initialize ds->phys_mii_mask before registering the slave MDIO bus - * driver and before drv->setup() has run, since the switch drivers and + * driver and before ops->setup() has run, since the switch drivers and * the slave MDIO bus driver rely on these values for probing PHY * devices or not */ ds->phys_mii_mask = ds->enabled_port_mask; - err = ds->drv->setup(ds); + err = ds->ops->setup(ds); if (err < 0) return err; - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) return err; - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) return err; - if (!ds->slave_mii_bus && ds->drv->phy_read) { + if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) return -ENOMEM; @@ -374,7 +374,7 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) dsa_user_port_unapply(port, index, ds); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } @@ -466,7 +466,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, dst->cpu_port = index; } - tag_protocol = ds->drv->get_tag_protocol(ds); + tag_protocol = ds->ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); @@ -543,7 +543,7 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) ds->ports[reg].dn = port; - /* Initialize enabled_port_mask now for drv->setup() + /* Initialize enabled_port_mask now for ops->setup() * to have access to a correct value, just like what * net/dsa/dsa.c::dsa_switch_setup_one does. */ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fc9196745225..9f6c2a20f6ff 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -28,7 +28,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_read(ds, addr, reg); + return ds->ops->phy_read(ds, addr, reg); return 0xffff; } @@ -38,7 +38,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_write(ds, addr, reg, val); + return ds->ops->phy_write(ds, addr, reg, val); return 0; } @@ -98,14 +98,14 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - if (ds->drv->port_enable) { - err = ds->drv->port_enable(ds, p->port, p->phy); + if (ds->ops->port_enable) { + err = ds->ops->port_enable(ds, p->port, p->phy); if (err) goto clear_promisc; } - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, stp_state); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -144,11 +144,11 @@ static int dsa_slave_close(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); - if (ds->drv->port_disable) - ds->drv->port_disable(ds, p->port, p->phy); + if (ds->ops->port_disable) + ds->ops->port_disable(ds, p->port, p->phy); - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -209,13 +209,13 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) return -EOPNOTSUPP; - return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); + return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans); } - ds->drv->port_vlan_add(ds, p->port, vlan, trans); + ds->ops->port_vlan_add(ds, p->port, vlan, trans); return 0; } @@ -226,10 +226,10 @@ static int dsa_slave_port_vlan_del(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (!ds->drv->port_vlan_del) + if (!ds->ops->port_vlan_del) return -EOPNOTSUPP; - return ds->drv->port_vlan_del(ds, p->port, vlan); + return ds->ops->port_vlan_del(ds, p->port, vlan); } static int dsa_slave_port_vlan_dump(struct net_device *dev, @@ -239,8 +239,8 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_vlan_dump) - return ds->drv->port_vlan_dump(ds, p->port, vlan, cb); + if (ds->ops->port_vlan_dump) + return ds->ops->port_vlan_dump(ds, p->port, vlan, cb); return -EOPNOTSUPP; } @@ -253,13 +253,13 @@ static int dsa_slave_port_fdb_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) + if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) return -EOPNOTSUPP; - return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); + return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans); } - ds->drv->port_fdb_add(ds, p->port, fdb, trans); + ds->ops->port_fdb_add(ds, p->port, fdb, trans); return 0; } @@ -271,8 +271,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev, struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->port_fdb_del) - ret = ds->drv->port_fdb_del(ds, p->port, fdb); + if (ds->ops->port_fdb_del) + ret = ds->ops->port_fdb_del(ds, p->port, fdb); return ret; } @@ -284,8 +284,8 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_fdb_dump) - return ds->drv->port_fdb_dump(ds, p->port, fdb, cb); + if (ds->ops->port_fdb_dump) + return ds->ops->port_fdb_dump(ds, p->port, fdb, cb); return -EOPNOTSUPP; } @@ -308,9 +308,9 @@ static int dsa_slave_stp_state_set(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) - return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP; + return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state); + ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state); return 0; } @@ -326,8 +326,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return 0; - if (ds->drv->port_vlan_filtering) - return ds->drv->port_vlan_filtering(ds, p->port, + if (ds->ops->port_vlan_filtering) + return ds->ops->port_vlan_filtering(ds, p->port, attr->u.vlan_filtering); return 0; @@ -365,8 +365,8 @@ static int dsa_slave_ageing_time(struct net_device *dev, ds->ports[p->port].ageing_time = ageing_time; ageing_time = dsa_fastest_ageing_time(ds, ageing_time); - if (ds->drv->set_ageing_time) - return ds->drv->set_ageing_time(ds, ageing_time); + if (ds->ops->set_ageing_time) + return ds->ops->set_ageing_time(ds, ageing_time); return 0; } @@ -481,8 +481,8 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, p->bridge_dev = br; - if (ds->drv->port_bridge_join) - ret = ds->drv->port_bridge_join(ds, p->port, br); + if (ds->ops->port_bridge_join) + ret = ds->ops->port_bridge_join(ds, p->port, br); return ret == -EOPNOTSUPP ? 0 : ret; } @@ -493,16 +493,16 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) struct dsa_switch *ds = p->parent; - if (ds->drv->port_bridge_leave) - ds->drv->port_bridge_leave(ds, p->port); + if (ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, p->port); p->bridge_dev = NULL; /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, @@ -605,8 +605,8 @@ static int dsa_slave_get_regs_len(struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs_len) - return ds->drv->get_regs_len(ds, p->port); + if (ds->ops->get_regs_len) + return ds->ops->get_regs_len(ds, p->port); return -EOPNOTSUPP; } @@ -617,8 +617,8 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs) - ds->drv->get_regs(ds, p->port, regs, _p); + if (ds->ops->get_regs) + ds->ops->get_regs(ds, p->port, regs, _p); } static int dsa_slave_nway_reset(struct net_device *dev) @@ -651,8 +651,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) if (ds->cd && ds->cd->eeprom_len) return ds->cd->eeprom_len; - if (ds->drv->get_eeprom_len) - return ds->drv->get_eeprom_len(ds); + if (ds->ops->get_eeprom_len) + return ds->ops->get_eeprom_len(ds); return 0; } @@ -663,8 +663,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_eeprom) - return ds->drv->get_eeprom(ds, eeprom, data); + if (ds->ops->get_eeprom) + return ds->ops->get_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -675,8 +675,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->set_eeprom) - return ds->drv->set_eeprom(ds, eeprom, data); + if (ds->ops->set_eeprom) + return ds->ops->set_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -694,8 +694,8 @@ static void dsa_slave_get_strings(struct net_device *dev, strncpy(data + len, "tx_bytes", len); strncpy(data + 2 * len, "rx_packets", len); strncpy(data + 3 * len, "rx_bytes", len); - if (ds->drv->get_strings != NULL) - ds->drv->get_strings(ds, p->port, data + 4 * len); + if (ds->ops->get_strings) + ds->ops->get_strings(ds, p->port, data + 4 * len); } } @@ -714,8 +714,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); } - if (ds->drv->get_ethtool_stats) - ds->drv->get_ethtool_stats(ds, cpu_port, data + count); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, cpu_port, data + count); } static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) @@ -727,8 +727,8 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) if (dst->master_ethtool_ops.get_sset_count) count += dst->master_ethtool_ops.get_sset_count(dev, sset); - if (sset == ETH_SS_STATS && ds->drv->get_sset_count) - count += ds->drv->get_sset_count(ds); + if (sset == ETH_SS_STATS && ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -755,14 +755,14 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, dst->master_ethtool_ops.get_strings(dev, stringset, data); } - if (stringset == ETH_SS_STATS && ds->drv->get_strings) { + if (stringset == ETH_SS_STATS && ds->ops->get_strings) { ndata = data + mcount * len; /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle * the output after to prepend our CPU port prefix we * constructed earlier */ - ds->drv->get_strings(ds, cpu_port, ndata); - count = ds->drv->get_sset_count(ds); + ds->ops->get_strings(ds, cpu_port, ndata); + count = ds->ops->get_sset_count(ds); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); @@ -782,8 +782,8 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, data[1] = dev->stats.tx_bytes; data[2] = dev->stats.rx_packets; data[3] = dev->stats.rx_bytes; - if (ds->drv->get_ethtool_stats != NULL) - ds->drv->get_ethtool_stats(ds, p->port, data + 4); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, p->port, data + 4); } static int dsa_slave_get_sset_count(struct net_device *dev, int sset) @@ -795,8 +795,8 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) int count; count = 4; - if (ds->drv->get_sset_count != NULL) - count += ds->drv->get_sset_count(ds); + if (ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -809,8 +809,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_wol) - ds->drv->get_wol(ds, p->port, w); + if (ds->ops->get_wol) + ds->ops->get_wol(ds, p->port, w); } static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) @@ -819,8 +819,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->set_wol) - ret = ds->drv->set_wol(ds, p->port, w); + if (ds->ops->set_wol) + ret = ds->ops->set_wol(ds, p->port, w); return ret; } @@ -831,10 +831,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->set_eee) + if (!ds->ops->set_eee) return -EOPNOTSUPP; - ret = ds->drv->set_eee(ds, p->port, p->phy, e); + ret = ds->ops->set_eee(ds, p->port, p->phy, e); if (ret) return ret; @@ -850,10 +850,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->get_eee) + if (!ds->ops->get_eee) return -EOPNOTSUPP; - ret = ds->drv->get_eee(ds, p->port, e); + ret = ds->ops->get_eee(ds, p->port, e); if (ret) return ret; @@ -988,8 +988,8 @@ static void dsa_slave_adjust_link(struct net_device *dev) p->old_pause = p->phy->pause; } - if (ds->drv->adjust_link && status_changed) - ds->drv->adjust_link(ds, p->port, p->phy); + if (ds->ops->adjust_link && status_changed) + ds->ops->adjust_link(ds, p->port, p->phy); if (status_changed) phy_print_status(p->phy); @@ -1004,8 +1004,8 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, if (dev) { p = netdev_priv(dev); ds = p->parent; - if (ds->drv->fixed_link_update) - ds->drv->fixed_link_update(ds, p->port, status); + if (ds->ops->fixed_link_update) + ds->ops->fixed_link_update(ds, p->port, status); } return 0; @@ -1062,8 +1062,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, phy_dn = port_dn; } - if (ds->drv->get_phy_flags) - phy_flags = ds->drv->get_phy_flags(ds, p->port); + if (ds->ops->get_phy_flags) + phy_flags = ds->ops->get_phy_flags(ds, p->port); if (phy_dn) { int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); -- cgit v1.2.3 From 6bc506b4fb065eac3d89ca1ce37082e174493d9e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:37 +0200 Subject: bridge: switchdev: Add forward mark support for stacked devices switchdev_port_fwd_mark_set() is used to set the 'offload_fwd_mark' of port netdevs so that packets being flooded by the device won't be flooded twice. It works by assigning a unique identifier (the ifindex of the first bridge port) to bridge ports sharing the same parent ID. This prevents packets from being flooded twice by the same switch, but will flood packets through bridge ports belonging to a different switch. This method is problematic when stacked devices are taken into account, such as VLANs. In such cases, a physical port netdev can have upper devices being members in two different bridges, thus requiring two different 'offload_fwd_mark's to be configured on the port netdev, which is impossible. The main problem is that packet and netdev marking is performed at the physical netdev level, whereas flooding occurs between bridge ports, which are not necessarily port netdevs. Instead, packet and netdev marking should really be done in the bridge driver with the switch driver only telling it which packets it already forwarded. The bridge driver will mark such packets using the mark assigned to the ingress bridge port and will prevent the packet from being forwarded through any bridge port sharing the same mark (i.e. having the same parent ID). Remove the current switchdev 'offload_fwd_mark' implementation and instead implement the proposed method. In addition, make rocker - the sole user of the mark - use the proposed method. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 13 ++--- drivers/net/ethernet/rocker/rocker_main.c | 2 +- drivers/net/ethernet/rocker/rocker_ofdpa.c | 4 -- include/linux/netdevice.h | 5 -- include/linux/skbuff.h | 13 ++--- include/net/switchdev.h | 6 --- net/bridge/Makefile | 2 + net/bridge/br_forward.c | 3 +- net/bridge/br_if.c | 10 ++-- net/bridge/br_input.c | 2 + net/bridge/br_private.h | 37 +++++++++++++ net/bridge/br_switchdev.c | 57 ++++++++++++++++++++ net/core/dev.c | 10 ---- net/switchdev/switchdev.c | 85 ------------------------------ 14 files changed, 117 insertions(+), 132 deletions(-) create mode 100644 net/bridge/br_switchdev.c (limited to 'Documentation') diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 31c39115834d..44235e83799b 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -283,15 +283,10 @@ be sent to the port netdev for processing by the bridge driver. The bridge should not reflood the packet to the same ports the device flooded, otherwise there will be duplicate packets on the wire. -To avoid duplicate packets, the device/driver should mark a packet as already -forwarded using skb->offload_fwd_mark. The same mark is set on the device -ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark -is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel -will drop the skb right before transmit on the egress port, with the -understanding that the device already forwarded the packet on same egress port. -The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark -for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and -a group ifindex. +To avoid duplicate packets, the switch driver should mark a packet as already +forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark +the skb using the ingress bridge port's mark and prevent it from being forwarded +through any bridge port with the same mark. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index f0b09b05ed3f..1f0c08602eba 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2412,7 +2412,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb->protocol = eth_type_trans(skb, rocker_port->dev); if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) - skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + skb->offload_fwd_mark = 1; rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 1ca796316173..fcad907baecf 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2558,7 +2558,6 @@ static int ofdpa_port_init(struct rocker_port *rocker_port) struct ofdpa_port *ofdpa_port = rocker_port->wpriv; int err; - switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false); rocker_port_set_learning(rocker_port, !!(ofdpa_port->brport_flags & BR_LEARNING)); @@ -2817,7 +2816,6 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex); ofdpa_port->bridge_dev = bridge; - switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true); return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); } @@ -2836,8 +2834,6 @@ static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) ofdpa_port_internal_vlan_id_get(ofdpa_port, ofdpa_port->dev->ifindex); - switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev, - false); ofdpa_port->bridge_dev = NULL; err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794bb0733799..d122be9345c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1562,8 +1562,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @offload_fwd_mark: Offload device fwding mark - * * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1814,9 +1812,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *egress_cl_list; #endif -#ifdef CONFIG_NET_SWITCHDEV - u32 offload_fwd_mark; -#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7047448e8129..cfb7219be665 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking - * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -730,7 +729,10 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; - /* 3 or 5 bit hole */ +#ifdef CONFIG_NET_SWITCHDEV + __u8 offload_fwd_mark:1; +#endif + /* 2, 4 or 5 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -757,14 +759,9 @@ struct sk_buff { unsigned int sender_cpu; }; #endif - union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; #endif -#ifdef CONFIG_NET_SWITCHDEV - __u32 offload_fwd_mark; -#endif - }; union { __u32 mark; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 62f6a967a1b7..82f5e0462021 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -347,12 +347,6 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return idx; } -static inline void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { diff --git a/net/bridge/Makefile b/net/bridge/Makefile index a1cda5d4718d..0aefc011b668 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o +bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o + obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 63a83d8d7da3..32a02de39cd2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING; + br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING && + nbp_switchdev_allowed_egress(p, skb); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f2fede05d32c..1da3221845f1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err5; + err = nbp_switchdev_mark_set(p); + if (err) + goto err6; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = nbp_vlan_init(p); if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); - goto err6; + goto err7; } spin_lock_bh(&br->lock); @@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; -err6: +err7: list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); +err6: netdev_upper_dev_unlink(dev, br->dev); - err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8e486203d133..3132cfc80e9d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -145,6 +145,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid)) goto out; + nbp_switchdev_frame_mark(p, skb); + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; if (p->flags & BR_LEARNING) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aac2a6e6b008..2379b2b865c9 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -251,6 +251,9 @@ struct net_bridge_port #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; #endif +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) @@ -359,6 +362,11 @@ struct net_bridge struct timer_list gc_timer; struct kobject *ifobj; u32 auto_cnt; + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif + #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; u8 vlan_enabled; @@ -381,6 +389,10 @@ struct br_input_skb_cb { #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; #endif + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) @@ -1034,4 +1046,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ +/* br_switchdev.c */ +#ifdef CONFIG_NET_SWITCHDEV +int nbp_switchdev_mark_set(struct net_bridge_port *p); +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb); +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb); +#else +static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + return 0; +} + +static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return true; +} +#endif /* CONFIG_NET_SWITCHDEV */ + #endif diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c new file mode 100644 index 000000000000..f4097b900de1 --- /dev/null +++ b/net/bridge/br_switchdev.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include + +#include "br_private.h" + +static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + + /* dev is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (switchdev_port_same_parent_id(dev, p->dev)) + return p->offload_fwd_mark; + } + + return ++br->offload_fwd_mark; +} + +int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; + int err; + + ASSERT_RTNL(); + + err = switchdev_port_attr_get(p->dev, &attr); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); + + return 0; +} + +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) + BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; +} + +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return !skb->offload_fwd_mark || + BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; +} diff --git a/net/core/dev.c b/net/core/dev.c index 7feae74ca928..1d5c6dda1988 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); -#ifdef CONFIG_NET_SWITCHDEV - /* Don't forward if offload device already forwarded */ - if (skb->offload_fwd_mark && - skb->offload_fwd_mark == dev->offload_fwd_mark) { - consume_skb(skb); - rc = NET_XMIT_SUCCESS; - goto out; - } -#endif - txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 2c683f24d557..1031a0327fff 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1305,88 +1305,3 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); - -static u32 switchdev_port_fwd_mark_get(struct net_device *dev, - struct net_device *group_dev) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev == dev) - continue; - if (switchdev_port_same_parent_id(dev, lower_dev)) - return lower_dev->offload_fwd_mark; - return switchdev_port_fwd_mark_get(dev, lower_dev); - } - - return dev->ifindex; -} - -static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, - u32 old_mark, u32 *reset_mark) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev->offload_fwd_mark == old_mark) { - if (!*reset_mark) - *reset_mark = lower_dev->ifindex; - lower_dev->offload_fwd_mark = *reset_mark; - } - switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); - } -} - -/** - * switchdev_port_fwd_mark_set - Set port offload forwarding mark - * - * @dev: port device - * @group_dev: containing device - * @joining: true if dev is joining group; false if leaving group - * - * An ungrouped port's offload mark is just its ifindex. A grouped - * port's (member of a bridge, for example) offload mark is the ifindex - * of one of the ports in the group with the same parent (switch) ID. - * Ports on the same device in the same group will have the same mark. - * - * Example: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=5 - * sw2p2 ifindex=5 mark=5 - * - * If sw2p2 leaves the bridge, we'll have: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=4 - * sw2p2 ifindex=5 mark=5 - */ -void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ - u32 mark = dev->ifindex; - u32 reset_mark = 0; - - if (group_dev) { - ASSERT_RTNL(); - if (joining) - mark = switchdev_port_fwd_mark_get(dev, group_dev); - else if (dev->offload_fwd_mark == mark) - /* Ohoh, this port was the mark reference port, - * but it's leaving the group, so reset the - * mark for the remaining ports in the group. - */ - switchdev_port_fwd_mark_reset(group_dev, mark, - &reset_mark); - } - - dev->offload_fwd_mark = mark; -} -EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); -- cgit v1.2.3 From 8324f0bcfbfc645cf248e4b93ab58341b7d3b135 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 09:49:29 +0100 Subject: rxrpc: Provide a way for AFS to ask for the peer address of a call Provide a function so that kernel users, such as AFS, can ask for the peer address of a call: void rxrpc_kernel_get_peer(struct rxrpc_call *call, struct sockaddr_rxrpc *_srx); In the future the kernel service won't get sk_buffs to look inside. Further, this allows us to hide any canonicalisation inside AF_RXRPC for when IPv6 support is added. Also propagate this through to afs_find_server() and issue a warning if we can't handle the address family yet. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 7 +++++++ fs/afs/cmservice.c | 20 +++++++++++--------- fs/afs/internal.h | 5 ++++- fs/afs/rxrpc.c | 2 +- fs/afs/server.c | 11 ++++++++--- include/net/af_rxrpc.h | 2 ++ net/rxrpc/peer_object.c | 15 +++++++++++++++ 7 files changed, 48 insertions(+), 14 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 70c926ae212d..dfe0b008df74 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -868,6 +868,13 @@ The kernel interface functions are as follows: This is used to allocate a null RxRPC key that can be used to indicate anonymous security for a particular domain. + (*) Get the peer address of a call. + + void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx); + + This is used to find the remote peer address of a call. + ======================= CONFIGURABLE PARAMETERS diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index ca32d891bbc3..77ee481059ac 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -167,9 +167,9 @@ static void SRXAFSCB_CallBack(struct work_struct *work) static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_callback *cb; struct afs_server *server; - struct in_addr addr; __be32 *bp; u32 tmp; int ret, loop; @@ -178,6 +178,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, switch (call->unmarshall) { case 0: + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); call->offset = 0; call->unmarshall++; @@ -282,8 +283,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -314,12 +314,14 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; int ret; _enter(",{%u},%d", skb->len, last); + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + ret = afs_data_complete(call, skb, last); if (ret < 0) return ret; @@ -329,8 +331,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -347,11 +348,13 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; _enter(",{%u},%d", skb->len, last); + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + /* There are some arguments that we ignore */ afs_data_consumed(call, skb); if (!last) @@ -362,8 +365,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index df976b2a7f40..d97552de9c59 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "afs.h" #include "afs_vl.h" @@ -607,6 +608,8 @@ extern void afs_proc_cell_remove(struct afs_cell *); /* * rxrpc.c */ +extern struct socket *afs_socket; + extern int afs_open_socket(void); extern void afs_close_socket(void); extern void afs_data_consumed(struct afs_call *, struct sk_buff *); @@ -654,7 +657,7 @@ do { \ extern struct afs_server *afs_lookup_server(struct afs_cell *, const struct in_addr *); -extern struct afs_server *afs_find_server(const struct in_addr *); +extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *); extern void afs_put_server(struct afs_server *); extern void __exit afs_purge_servers(void); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 14d04c848465..a1916750e2f9 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -16,7 +16,7 @@ #include "internal.h" #include "afs_cm.h" -static struct socket *afs_socket; /* my RxRPC socket */ +struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; static atomic_t afs_outstanding_calls; static atomic_t afs_outstanding_skbs; diff --git a/fs/afs/server.c b/fs/afs/server.c index f342acf3547d..d4066ab7dd55 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -178,13 +178,18 @@ server_in_two_cells: /* * look up a server by its IP address */ -struct afs_server *afs_find_server(const struct in_addr *_addr) +struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) { struct afs_server *server = NULL; struct rb_node *p; - struct in_addr addr = *_addr; + struct in_addr addr = srx->transport.sin.sin_addr; - _enter("%pI4", &addr.s_addr); + _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr); + + if (srx->transport.family != AF_INET) { + WARN(true, "AFS does not yes support non-IPv4 addresses\n"); + return NULL; + } read_lock(&afs_servers_lock); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 7b0f88699b25..f9224e835d43 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -49,5 +49,7 @@ int rxrpc_kernel_get_error_number(struct sk_buff *); void rxrpc_kernel_free_skb(struct sk_buff *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); int rxrpc_kernel_reject_call(struct socket *); +void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, + struct sockaddr_rxrpc *); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 538e9831c699..aebc73ac16dc 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -313,3 +313,18 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) kfree_rcu(peer, rcu); } + +/** + * rxrpc_kernel_get_peer - Get the peer address of a call + * @sock: The socket on which the call is in progress. + * @call: The call to query + * @_srx: Where to place the result + * + * Get the address of the remote peer in a call. + */ +void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx) +{ + *_srx = call->peer->srx; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); -- cgit v1.2.3 From 4de48af663d88d8c9a2550e60725f5a5c660970b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 12:00:48 +0100 Subject: rxrpc: Pass struct socket * to more rxrpc kernel interface functions Pass struct socket * to more rxrpc kernel interface functions. They should be starting from this rather than the socket pointer in the rxrpc_call struct if they need to access the socket. I have left: rxrpc_kernel_is_data_last() rxrpc_kernel_get_abort_code() rxrpc_kernel_get_error_number() rxrpc_kernel_free_skb() rxrpc_kernel_data_consumed() unmodified as they're all about to be removed (and, in any case, don't touch the socket). Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 11 ++++++++--- fs/afs/rxrpc.c | 26 +++++++++++++++----------- include/net/af_rxrpc.h | 10 +++++++--- net/rxrpc/af_rxrpc.c | 5 +++-- net/rxrpc/output.c | 20 +++++++++++--------- 5 files changed, 44 insertions(+), 28 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index dfe0b008df74..cfc8cb91452f 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -725,7 +725,8 @@ The kernel interface functions are as follows: (*) End a client call. - void rxrpc_kernel_end_call(struct rxrpc_call *call); + void rxrpc_kernel_end_call(struct socket *sock, + struct rxrpc_call *call); This is used to end a previously begun call. The user_call_ID is expunged from AF_RXRPC's knowledge and will not be seen again in association with @@ -733,7 +734,9 @@ The kernel interface functions are as follows: (*) Send data through a call. - int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, + int rxrpc_kernel_send_data(struct socket *sock, + struct rxrpc_call *call, + struct msghdr *msg, size_t len); This is used to supply either the request part of a client call or the @@ -747,7 +750,9 @@ The kernel interface functions are as follows: (*) Abort a call. - void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code); + void rxrpc_kernel_abort_call(struct socket *sock, + struct rxrpc_call *call, + u32 abort_code); This is used to abort a call if it's still in an abortable state. The abort code specified will be placed in the ABORT message sent. diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a1916750e2f9..7b0d18900f50 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -207,7 +207,7 @@ static void afs_free_call(struct afs_call *call) static void afs_end_call_nofree(struct afs_call *call) { if (call->rxcall) { - rxrpc_kernel_end_call(call->rxcall); + rxrpc_kernel_end_call(afs_socket, call->rxcall); call->rxcall = NULL; } if (call->type->destructor) @@ -325,8 +325,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, * returns from sending the request */ if (first + loop >= last) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(call->rxcall, msg, - to - offset); + ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, + msg, to - offset); kunmap(pages[loop]); if (ret < 0) break; @@ -406,7 +406,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, * request */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); + ret = rxrpc_kernel_send_data(afs_socket, rxcall, + &msg, call->request_size); if (ret < 0) goto error_do_abort; @@ -421,7 +422,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); error_kill_call: @@ -509,7 +510,8 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; do_abort: - rxrpc_kernel_abort_call(call->rxcall, + rxrpc_kernel_abort_call(afs_socket, + call->rxcall, abort_code); call->error = ret; call->state = AFS_CALL_ERROR; @@ -605,7 +607,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* kill the call */ if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); - rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); } @@ -823,14 +825,15 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) { + switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) { case 0: _leave(" [replied]"); return; case -ENOMEM: _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT); default: afs_end_call(call); _leave(" [error]"); @@ -859,7 +862,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - n = rxrpc_kernel_send_data(call->rxcall, &msg, len); + n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len); if (n >= 0) { /* Success */ _leave(" [replied]"); @@ -868,7 +871,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT); } afs_end_call(call); _leave(" [error]"); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f9224e835d43..f8d8079dc058 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,9 @@ #include #include +struct key; +struct sock; +struct socket; struct rxrpc_call; /* @@ -39,10 +42,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct key *, unsigned long, gfp_t); -int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t); +int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, + struct msghdr *, size_t); void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); -void rxrpc_kernel_abort_call(struct rxrpc_call *, u32); -void rxrpc_kernel_end_call(struct rxrpc_call *); +void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); +void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_is_data_last(struct sk_buff *); u32 rxrpc_kernel_get_abort_code(struct sk_buff *); int rxrpc_kernel_get_error_number(struct sk_buff *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c7cf356b42b8..e07c91acd904 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -279,15 +279,16 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using + * @sock: The socket the call is on * @call: The call to end * * Allow a kernel service to end a call it was using. The call must be * complete before this is called (the call should be aborted if necessary). */ -void rxrpc_kernel_end_call(struct rxrpc_call *call) +void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - rxrpc_remove_user_ID(call->socket, call); + rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); rxrpc_put_call(call); } EXPORT_SYMBOL(rxrpc_kernel_end_call); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 888fa87ed1d6..b1e708a12151 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -239,6 +239,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /** * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on * @call: The call to send data through * @msg: The data to send * @len: The amount of data to send @@ -248,8 +249,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) * nor should an address be supplied. MSG_MORE should be flagged if there's * more data to come, otherwise this data will end the transmission phase. */ -int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, - size_t len) +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len) { int ret; @@ -258,7 +259,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); - lock_sock(&call->socket->sk); + lock_sock(sock->sk); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); @@ -270,35 +271,36 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - ret = rxrpc_send_data(call->socket, call, msg, len); + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); } - release_sock(&call->socket->sk); + release_sock(sock->sk); _leave(" = %d", ret); return ret; } - EXPORT_SYMBOL(rxrpc_kernel_send_data); /** * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * * Allow a kernel service to abort a call, if it's still in an abortable state. */ -void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code) +void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code) { _enter("{%d},%d", call->debug_id, abort_code); - lock_sock(&call->socket->sk); + lock_sock(sock->sk); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); rxrpc_send_abort(call, abort_code); - release_sock(&call->socket->sk); + release_sock(sock->sk); _leave(""); } -- cgit v1.2.3 From 8df3025520aaeba36aba867a4851f8968ac65b4d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 31 Aug 2016 11:50:03 -0400 Subject: net: dsa: add MDB support Add SWITCHDEV_OBJ_ID_PORT_MDB support to the DSA layer. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 23 +++++++++++++++ include/net/dsa.h | 16 +++++++++++ net/dsa/slave.c | 55 ++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index a4e55c76d371..6d6c07cf1a9a 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -584,6 +584,29 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device. function that the driver has to call for each MAC address known to be behind the given port. A switchdev object is used to carry the VID and FDB info. +- port_mdb_prepare: bridge layer function invoked when the bridge prepares the + installation of a multicast database entry. If the operation is not supported, + this function should return -EOPNOTSUPP to inform the bridge code to fallback + to a software implementation. No hardware setup must be done in this function. + See port_fdb_add for this and details. + +- port_mdb_add: bridge layer function invoked when the bridge wants to install + a multicast database entry, the switch hardware should be programmed with the + specified address in the specified VLAN ID in the forwarding database + associated with this VLAN ID. + +Note: VLAN ID 0 corresponds to the port private database, which, in the context +of DSA, would be the its port-based VLAN, used by the associated bridge device. + +- port_mdb_del: bridge layer function invoked when the bridge wants to remove a + multicast database entry, the switch hardware should be programmed to delete + the specified MAC address from the specified VLAN ID if it was mapped into + this port forwarding database. + +- port_mdb_dump: bridge layer function invoked with a switchdev callback + function that the driver has to call for each MAC address known to be behind + the given port. A switchdev object is used to carry the VID and MDB info. + TODO ==== diff --git a/include/net/dsa.h b/include/net/dsa.h index 2ebeba44a461..e3eb230b970d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -234,6 +234,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) struct switchdev_trans; struct switchdev_obj; struct switchdev_obj_port_fdb; +struct switchdev_obj_port_mdb; struct switchdev_obj_port_vlan; struct dsa_switch_ops { @@ -369,6 +370,21 @@ struct dsa_switch_ops { int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)); + + /* + * Multicast database + */ + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + int (*port_mdb_del)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + int (*port_mdb_dump)(struct dsa_switch *ds, int port, + struct switchdev_obj_port_mdb *mdb, + int (*cb)(struct switchdev_obj *obj)); }; void register_switch_driver(struct dsa_switch_ops *type); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9f6c2a20f6ff..9ecbe787f102 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -290,6 +290,50 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, return -EOPNOTSUPP; } +static int dsa_slave_port_mdb_add(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + return ds->ops->port_mdb_prepare(ds, p->port, mdb, trans); + } + + ds->ops->port_mdb_add(ds, p->port, mdb, trans); + + return 0; +} + +static int dsa_slave_port_mdb_del(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_del) + return ds->ops->port_mdb_del(ds, p->port, mdb); + + return -EOPNOTSUPP; +} + +static int dsa_slave_port_mdb_dump(struct net_device *dev, + struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_dump) + return ds->ops->port_mdb_dump(ds, p->port, mdb, cb); + + return -EOPNOTSUPP; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -412,6 +456,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_add(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), @@ -435,6 +483,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, err = dsa_slave_port_fdb_del(dev, SWITCHDEV_OBJ_PORT_FDB(obj)); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_del(dev, SWITCHDEV_OBJ_PORT_VLAN(obj)); @@ -459,6 +510,10 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), cb); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + cb); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_dump(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), -- cgit v1.2.3 From 99abf9d6641e7857df67faa2ff7e4044cdea9683 Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Mon, 29 Aug 2016 18:23:39 +0200 Subject: Documentation: Bindings: Add STM32 DWMAC glue Acked-by: Rob Herring Signed-off-by: Alexandre TORGUE Signed-off-by: David S. Miller --- .../devicetree/bindings/net/stm32-dwmac.txt | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/stm32-dwmac.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt new file mode 100644 index 000000000000..c35afb7e956a --- /dev/null +++ b/Documentation/devicetree/bindings/net/stm32-dwmac.txt @@ -0,0 +1,32 @@ +STMicroelectronics STM32 / MCU DWMAC glue layer controller + +This file documents platform glue layer for stmmac. +Please see stmmac.txt for the other unchanged properties. + +The device node has following properties. + +Required properties: +- compatible: Should be "st,stm32-dwmac" to select glue, and + "snps,dwmac-3.50a" to select IP version. +- clocks: Must contain a phandle for each entry in clock-names. +- clock-names: Should be "stmmaceth" for the host clock. + Should be "mac-clk-tx" for the MAC TX clock. + Should be "mac-clk-rx" for the MAC RX clock. +- st,syscon : Should be phandle/offset pair. The phandle to the syscon node which + encompases the glue register, and the offset of the control register. +Example: + + ethernet@40028000 { + compatible = "st,stm32-dwmac", "snps,dwmac-3.50a"; + status = "disabled"; + reg = <0x40028000 0x8000>; + reg-names = "stmmaceth"; + interrupts = <0 61 0>, <0 62 0>; + interrupt-names = "macirq", "eth_wake_irq"; + clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx"; + clocks = <&rcc 0 25>, <&rcc 0 26>, <&rcc 0 27>; + st,syscon = <&syscfg 0x4>; + snps,pbl = <8>; + snps,mixed-burst; + dma-ranges; + }; -- cgit v1.2.3 From d001648ec7cf8b21ae9eec8b9ba4a18295adfb14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 20:42:14 +0100 Subject: rxrpc: Don't expose skbs to in-kernel users [ver #2] Don't expose skbs to in-kernel users, such as the AFS filesystem, but instead provide a notification hook the indicates that a call needs attention and another that indicates that there's a new call to be collected. This makes the following possibilities more achievable: (1) Call refcounting can be made simpler if skbs don't hold refs to calls. (2) skbs referring to non-data events will be able to be freed much sooner rather than being queued for AFS to pick up as rxrpc_kernel_recv_data will be able to consult the call state. (3) We can shortcut the receive phase when a call is remotely aborted because we don't have to go through all the packets to get to the one cancelling the operation. (4) It makes it easier to do encryption/decryption directly between AFS's buffers and sk_buffs. (5) Encryption/decryption can more easily be done in the AFS's thread contexts - usually that of the userspace process that issued a syscall - rather than in one of rxrpc's background threads on a workqueue. (6) AFS will be able to wait synchronously on a call inside AF_RXRPC. To make this work, the following interface function has been added: int rxrpc_kernel_recv_data( struct socket *sock, struct rxrpc_call *call, void *buffer, size_t bufsize, size_t *_offset, bool want_more, u32 *_abort_code); This is the recvmsg equivalent. It allows the caller to find out about the state of a specific call and to transfer received data into a buffer piecemeal. afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction logic between them. They don't wait synchronously yet because the socket lock needs to be dealt with. Five interface functions have been removed: rxrpc_kernel_is_data_last() rxrpc_kernel_get_abort_code() rxrpc_kernel_get_error_number() rxrpc_kernel_free_skb() rxrpc_kernel_data_consumed() As a temporary hack, sk_buffs going to an in-kernel call are queued on the rxrpc_call struct (->knlrecv_queue) rather than being handed over to the in-kernel user. To process the queue internally, a temporary function, temp_deliver_data() has been added. This will be replaced with common code between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a future patch. Signed-off-by: David Howells Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.txt | 72 +++--- fs/afs/cmservice.c | 142 ++++++------ fs/afs/fsclient.c | 148 ++++++------- fs/afs/internal.h | 33 +-- fs/afs/rxrpc.c | 439 +++++++++++++------------------------ fs/afs/vlclient.c | 7 +- include/net/af_rxrpc.h | 35 +-- net/rxrpc/af_rxrpc.c | 29 +-- net/rxrpc/ar-internal.h | 23 +- net/rxrpc/call_accept.c | 13 +- net/rxrpc/call_object.c | 5 +- net/rxrpc/conn_event.c | 1 - net/rxrpc/input.c | 10 +- net/rxrpc/output.c | 2 +- net/rxrpc/recvmsg.c | 191 +++++++++++++--- net/rxrpc/skbuff.c | 1 - 16 files changed, 565 insertions(+), 586 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index cfc8cb91452f..1b63bbc6b94f 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -748,6 +748,37 @@ The kernel interface functions are as follows: The msg must not specify a destination address, control data or any flags other than MSG_MORE. len is the total amount of data to transmit. + (*) Receive data from a call. + + int rxrpc_kernel_recv_data(struct socket *sock, + struct rxrpc_call *call, + void *buf, + size_t size, + size_t *_offset, + bool want_more, + u32 *_abort) + + This is used to receive data from either the reply part of a client call + or the request part of a service call. buf and size specify how much + data is desired and where to store it. *_offset is added on to buf and + subtracted from size internally; the amount copied into the buffer is + added to *_offset before returning. + + want_more should be true if further data will be required after this is + satisfied and false if this is the last item of the receive phase. + + There are three normal returns: 0 if the buffer was filled and want_more + was true; 1 if the buffer was filled, the last DATA packet has been + emptied and want_more was false; and -EAGAIN if the function needs to be + called again. + + If the last DATA packet is processed but the buffer contains less than + the amount requested, EBADMSG is returned. If want_more wasn't set, but + more data was available, EMSGSIZE is returned. + + If a remote ABORT is detected, the abort code received will be stored in + *_abort and ECONNABORTED will be returned. + (*) Abort a call. void rxrpc_kernel_abort_call(struct socket *sock, @@ -825,47 +856,6 @@ The kernel interface functions are as follows: Other errors may be returned if the call had been aborted (-ECONNABORTED) or had timed out (-ETIME). - (*) Record the delivery of a data message. - - void rxrpc_kernel_data_consumed(struct rxrpc_call *call, - struct sk_buff *skb); - - This is used to record a data message as having been consumed and to - update the ACK state for the call. The message must still be passed to - rxrpc_kernel_free_skb() for disposal by the caller. - - (*) Free a message. - - void rxrpc_kernel_free_skb(struct sk_buff *skb); - - This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC - socket. - - (*) Determine if a data message is the last one on a call. - - bool rxrpc_kernel_is_data_last(struct sk_buff *skb); - - This is used to determine if a socket buffer holds the last data message - to be received for a call (true will be returned if it does, false - if not). - - The data message will be part of the reply on a client call and the - request on an incoming call. In the latter case there will be more - messages, but in the former case there will not. - - (*) Get the abort code from an abort message. - - u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb); - - This is used to extract the abort code from a remote abort message. - - (*) Get the error number from a local or network error message. - - int rxrpc_kernel_get_error_number(struct sk_buff *skb); - - This is used to extract the error number from a message indicating either - a local error occurred or a network error occurred. - (*) Allocate a null key for doing anonymous security. struct key *rxrpc_get_null_key(const char *keyname); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 77ee481059ac..2037e7a77a37 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -17,15 +17,12 @@ #include "internal.h" #include "afs_cm.h" -static int afs_deliver_cb_init_call_back_state(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_init_call_back_state3(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, - struct sk_buff *, bool); +static int afs_deliver_cb_init_call_back_state(struct afs_call *); +static int afs_deliver_cb_init_call_back_state3(struct afs_call *); +static int afs_deliver_cb_probe(struct afs_call *); +static int afs_deliver_cb_callback(struct afs_call *); +static int afs_deliver_cb_probe_uuid(struct afs_call *); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *); static void afs_cm_destructor(struct afs_call *); /* @@ -130,7 +127,7 @@ static void afs_cm_destructor(struct afs_call *call) * received. The step number here must match the final number in * afs_deliver_cb_callback(). */ - if (call->unmarshall == 6) { + if (call->unmarshall == 5) { ASSERT(call->server && call->count && call->request); afs_break_callbacks(call->server, call->count, call->request); } @@ -164,8 +161,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work) /* * deliver request data to a CB.CallBack call */ -static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_callback(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_callback *cb; @@ -174,7 +170,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, u32 tmp; int ret, loop; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -185,7 +181,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* extract the FID array and its count in two steps */ case 1: _debug("extract FID count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -202,8 +198,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 2: _debug("extract FID array"); - ret = afs_extract_data(call, skb, last, call->buffer, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, true); if (ret < 0) return ret; @@ -229,7 +225,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* extract the callback array and its count in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -239,13 +235,11 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, return -EBADMSG; call->offset = 0; call->unmarshall++; - if (tmp == 0) - goto empty_cb_array; case 4: _debug("extract CB array"); - ret = afs_extract_data(call, skb, last, call->request, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, false); if (ret < 0) return ret; @@ -258,15 +252,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, cb->type = ntohl(*bp++); } - empty_cb_array: call->offset = 0; call->unmarshall++; - case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - /* Record that the message was unmarshalled successfully so * that the call destructor can know do the callback breaking * work, even if the final ACK isn't received. @@ -275,7 +263,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, * updated also. */ call->unmarshall++; - case 6: + case 5: break; } @@ -310,19 +298,17 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) /* * deliver request data to a CB.InitCallBackState call */ -static int afs_deliver_cb_init_call_back_state(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; @@ -344,21 +330,61 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* * deliver request data to a CB.InitCallBackState3 call */ -static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); - /* There are some arguments that we ignore */ - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + call->unmarshall++; + + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->offset = 0; + call->unmarshall++; + + case 2: + break; + } /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -390,14 +416,13 @@ static void SRXAFSCB_Probe(struct work_struct *work) /* * deliver request data to a CB.Probe call */ -static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_probe(struct afs_call *call) { int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; @@ -435,19 +460,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) /* * deliver request data to a CB.ProbeUuid call */ -static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_probe_uuid(struct afs_call *call) { struct afs_uuid *r; unsigned loop; __be32 *b; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -459,8 +479,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, case 1: _debug("extract UUID"); - ret = afs_extract_data(call, skb, last, call->buffer, - 11 * sizeof(__be32)); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -487,16 +507,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, call->unmarshall++; case 2: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; break; } - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - call->state = AFS_CALL_REPLYING; INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); @@ -570,14 +583,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) /* * deliver request data to a CB.TellMeAboutYourself call */ -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) { int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 9312b92e54be..96f4d764d1a6 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -235,16 +235,15 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, /* * deliver reply data to an FS.FetchStatus */ -static int afs_deliver_fs_fetch_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_status(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -307,8 +306,7 @@ int afs_fs_fetch_file_status(struct afs_server *server, /* * deliver reply data to an FS.FetchData */ -static int afs_deliver_fs_fetch_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; @@ -316,7 +314,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, void *buffer; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -332,7 +330,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, * client) */ case 1: _debug("extract data length (MSW)"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -347,7 +345,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the returned data length */ case 2: _debug("extract data length"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -363,10 +361,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, _debug("extract data"); if (call->count > 0) { page = call->reply3; - buffer = kmap_atomic(page); - ret = afs_extract_data(call, skb, last, buffer, - call->count); - kunmap_atomic(buffer); + buffer = kmap(page); + ret = afs_extract_data(call, buffer, + call->count, true); + kunmap(buffer); if (ret < 0) return ret; } @@ -376,8 +374,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the metadata */ case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - (21 + 3 + 6) * 4); + ret = afs_extract_data(call, call->buffer, + (21 + 3 + 6) * 4, false); if (ret < 0) return ret; @@ -391,18 +389,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } if (call->count < PAGE_SIZE) { _debug("clear"); page = call->reply3; - buffer = kmap_atomic(page); + buffer = kmap(page); memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap_atomic(buffer); + kunmap(buffer); } _leave(" = 0 [done]"); @@ -515,13 +510,12 @@ int afs_fs_fetch_data(struct afs_server *server, /* * deliver reply data to an FS.GiveUpCallBacks */ -static int afs_deliver_fs_give_up_callbacks(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_give_up_callbacks(struct afs_call *call) { - _enter(",{%u},%d", skb->len, last); + _enter(""); /* shouldn't be any reply data */ - return afs_data_complete(call, skb, last); + return afs_extract_data(call, NULL, 0, false); } /* @@ -599,16 +593,15 @@ int afs_fs_give_up_callbacks(struct afs_server *server, /* * deliver reply data to an FS.CreateFile or an FS.MakeDir */ -static int afs_deliver_fs_create_vnode(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_create_vnode(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -696,16 +689,15 @@ int afs_fs_create(struct afs_server *server, /* * deliver reply data to an FS.RemoveFile or FS.RemoveDir */ -static int afs_deliver_fs_remove(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_remove(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -777,16 +769,15 @@ int afs_fs_remove(struct afs_server *server, /* * deliver reply data to an FS.Link */ -static int afs_deliver_fs_link(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_link(struct afs_call *call) { struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -863,16 +854,15 @@ int afs_fs_link(struct afs_server *server, /* * deliver reply data to an FS.Symlink */ -static int afs_deliver_fs_symlink(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_symlink(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -968,16 +958,15 @@ int afs_fs_symlink(struct afs_server *server, /* * deliver reply data to an FS.Rename */ -static int afs_deliver_fs_rename(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_rename(struct afs_call *call) { struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1072,16 +1061,15 @@ int afs_fs_rename(struct afs_server *server, /* * deliver reply data to an FS.StoreData */ -static int afs_deliver_fs_store_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1251,17 +1239,16 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, /* * deliver reply data to an FS.StoreStatus */ -static int afs_deliver_fs_store_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_status(struct afs_call *call) { afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1443,14 +1430,13 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, /* * deliver reply data to an FS.GetVolumeStatus */ -static int afs_deliver_fs_get_volume_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_get_volume_status(struct afs_call *call) { const __be32 *bp; char *p; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -1460,8 +1446,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the returned status record */ case 1: _debug("extract status"); - ret = afs_extract_data(call, skb, last, call->buffer, - 12 * 4); + ret = afs_extract_data(call, call->buffer, + 12 * 4, true); if (ret < 0) return ret; @@ -1472,7 +1458,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the volume name length */ case 2: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1487,8 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 3: _debug("extract volname"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1508,8 +1494,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1519,7 +1505,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the offline message length */ case 5: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1534,8 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 6: _debug("extract offline"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1555,8 +1541,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 7: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1566,7 +1552,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the message of the day length */ case 8: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1581,8 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 9: _debug("extract motd"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1595,26 +1581,17 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->unmarshall++; /* extract the message of the day padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_motd_padding; - } - call->count = 4 - (call->count & 3); + call->count = (4 - (call->count & 3)) & 3; case 10: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, false); if (ret < 0) return ret; call->offset = 0; call->unmarshall++; - no_motd_padding: - case 11: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } @@ -1685,15 +1662,14 @@ int afs_fs_get_volume_status(struct afs_server *server, /* * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock */ -static int afs_deliver_fs_xxxx_lock(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_xxxx_lock(struct afs_call *call) { const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d97552de9c59..5497c8496055 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,7 @@ struct afs_mount_params { */ struct afs_wait_mode { /* RxRPC received message notification */ - void (*rx_wakeup)(struct afs_call *call); + rxrpc_notify_rx_t notify_rx; /* synchronous call waiter and call dispatched notification */ int (*wait)(struct afs_call *call); @@ -76,10 +75,8 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ - void (*async_workfn)(struct afs_call *call); /* asynchronous work function */ struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ - struct sk_buff_head rx_queue; /* received packets */ struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_server *server; /* server affected by incoming CM call */ @@ -93,6 +90,7 @@ struct afs_call { void *reply4; /* reply buffer (fourth part) */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ + size_t offset; /* offset into received data store */ enum { /* call state */ AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ @@ -100,21 +98,18 @@ struct afs_call { AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */ AFS_CALL_REPLYING, /* replying to incoming call */ AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ - AFS_CALL_COMPLETE, /* successfully completed */ - AFS_CALL_BUSY, /* server was busy */ - AFS_CALL_ABORTED, /* call was aborted */ - AFS_CALL_ERROR, /* call failed due to error */ + AFS_CALL_COMPLETE, /* Completed or failed */ } state; int error; /* error code */ + u32 abort_code; /* Remote abort ID or 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ - unsigned reply_size; /* current size of reply */ unsigned first_offset; /* offset into mapping[first] */ unsigned last_to; /* amount of mapping[last] */ - unsigned offset; /* offset into received data store */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ + bool need_attention; /* T if RxRPC poked us */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ __be32 operation_ID; /* operation ID for an incoming call */ @@ -129,8 +124,7 @@ struct afs_call_type { /* deliver request or reply data to an call * - returning an error will cause the call to be aborted */ - int (*deliver)(struct afs_call *call, struct sk_buff *skb, - bool last); + int (*deliver)(struct afs_call *call); /* map an abort code to an error number */ int (*abort_to_error)(u32 abort_code); @@ -612,27 +606,18 @@ extern struct socket *afs_socket; extern int afs_open_socket(void); extern void afs_close_socket(void); -extern void afs_data_consumed(struct afs_call *, struct sk_buff *); extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, const struct afs_wait_mode *); extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); -extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); -extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, - size_t); +extern int afs_extract_data(struct afs_call *, void *, size_t, bool); -static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb, - bool last) +static inline int afs_transfer_reply(struct afs_call *call) { - if (skb->len > 0) - return -EBADMSG; - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - return 0; + return afs_extract_data(call, call->buffer, call->reply_max, false); } /* diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 7b0d18900f50..244896baf241 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -19,31 +19,31 @@ struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; static atomic_t afs_outstanding_calls; -static atomic_t afs_outstanding_skbs; -static void afs_wake_up_call_waiter(struct afs_call *); +static void afs_free_call(struct afs_call *); +static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static int afs_wait_for_call_to_complete(struct afs_call *); -static void afs_wake_up_async_call(struct afs_call *); +static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static int afs_dont_wait_for_call_to_complete(struct afs_call *); -static void afs_process_async_call(struct afs_call *); -static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *); -static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool); +static void afs_process_async_call(struct work_struct *); +static void afs_rx_new_call(struct sock *); +static int afs_deliver_cm_op_id(struct afs_call *); /* synchronous call management */ const struct afs_wait_mode afs_sync_call = { - .rx_wakeup = afs_wake_up_call_waiter, + .notify_rx = afs_wake_up_call_waiter, .wait = afs_wait_for_call_to_complete, }; /* asynchronous call management */ const struct afs_wait_mode afs_async_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, .wait = afs_dont_wait_for_call_to_complete, }; /* asynchronous incoming call management */ static const struct afs_wait_mode afs_async_incoming_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, }; /* asynchronous incoming call initial processing */ @@ -55,16 +55,8 @@ static const struct afs_call_type afs_RXCMxxxx = { static void afs_collect_incoming_call(struct work_struct *); -static struct sk_buff_head afs_incoming_calls; static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); -static void afs_async_workfn(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - call->async_workfn(call); -} - static int afs_wait_atomic_t(atomic_t *p) { schedule(); @@ -83,8 +75,6 @@ int afs_open_socket(void) _enter(""); - skb_queue_head_init(&afs_incoming_calls); - ret = -ENOMEM; afs_async_calls = create_singlethread_workqueue("kafsd"); if (!afs_async_calls) @@ -110,12 +100,12 @@ int afs_open_socket(void) if (ret < 0) goto error_2; + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call); + ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; - rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor); - afs_socket = socket; _leave(" = 0"); return 0; @@ -136,51 +126,19 @@ void afs_close_socket(void) { _enter(""); + _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); + flush_workqueue(afs_async_calls); sock_release(afs_socket); _debug("dework"); destroy_workqueue(afs_async_calls); - - ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0); _leave(""); } -/* - * Note that the data in a socket buffer is now consumed. - */ -void afs_data_consumed(struct afs_call *call, struct sk_buff *skb) -{ - if (!skb) { - _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("DLVR %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - rxrpc_kernel_data_consumed(call->rxcall, skb); - } -} - -/* - * free a socket buffer - */ -static void afs_free_skb(struct sk_buff *skb) -{ - if (!skb) { - _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("FREE %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - if (atomic_dec_return(&afs_outstanding_skbs) == -1) - BUG(); - rxrpc_kernel_free_skb(skb); - } -} - /* * free a call */ @@ -191,7 +149,6 @@ static void afs_free_call(struct afs_call *call) ASSERTCMP(call->rxcall, ==, NULL); ASSERT(!work_pending(&call->async_work)); - ASSERT(skb_queue_empty(&call->rx_queue)); ASSERT(call->type->name != NULL); kfree(call->request); @@ -227,7 +184,7 @@ static void afs_end_call(struct afs_call *call) * allocate a call with flat request and reply buffers */ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, - size_t request_size, size_t reply_size) + size_t request_size, size_t reply_max) { struct afs_call *call; @@ -241,7 +198,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, call->type = type; call->request_size = request_size; - call->reply_max = reply_size; + call->reply_max = reply_max; if (request_size) { call->request = kmalloc(request_size, GFP_NOFS); @@ -249,14 +206,13 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, goto nomem_free; } - if (reply_size) { - call->buffer = kmalloc(reply_size, GFP_NOFS); + if (reply_max) { + call->buffer = kmalloc(reply_max, GFP_NOFS); if (!call->buffer) goto nomem_free; } init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); return call; nomem_free: @@ -354,7 +310,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct msghdr msg; struct kvec iov[1]; int ret; - struct sk_buff *skb; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -366,8 +321,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; @@ -380,7 +334,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, /* create a call */ rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, - (unsigned long) call, gfp); + (unsigned long) call, gfp, + wait_mode->notify_rx); call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -423,150 +378,84 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, error_do_abort: rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); error_kill_call: afs_end_call(call); _leave(" = %d", ret); return ret; } -/* - * Handles intercepted messages that were arriving in the socket's Rx queue. - * - * Called from the AF_RXRPC call processor in waitqueue process context. For - * each call, it is guaranteed this will be called in order of packet to be - * delivered. - */ -static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID, - struct sk_buff *skb) -{ - struct afs_call *call = (struct afs_call *) user_call_ID; - - _enter("%p,,%u", call, skb->mark); - - _debug("ICPT %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - - ASSERTCMP(sk, ==, afs_socket->sk); - atomic_inc(&afs_outstanding_skbs); - - if (!call) { - /* its an incoming call for our callback service */ - skb_queue_tail(&afs_incoming_calls, skb); - queue_work(afs_wq, &afs_collect_incoming_call_work); - } else { - /* route the messages directly to the appropriate call */ - skb_queue_tail(&call->rx_queue, skb); - call->wait_mode->rx_wakeup(call); - } - - _leave(""); -} - /* * deliver messages to a call */ static void afs_deliver_to_call(struct afs_call *call) { - struct sk_buff *skb; - bool last; u32 abort_code; int ret; - _enter(""); - - while ((call->state == AFS_CALL_AWAIT_REPLY || - call->state == AFS_CALL_AWAIT_OP_ID || - call->state == AFS_CALL_AWAIT_REQUEST || - call->state == AFS_CALL_AWAIT_ACK) && - (skb = skb_dequeue(&call->rx_queue))) { - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: - _debug("Rcv DATA"); - last = rxrpc_kernel_is_data_last(skb); - ret = call->type->deliver(call, skb, last); - switch (ret) { - case -EAGAIN: - if (last) { - _debug("short data"); - goto unmarshal_error; - } - break; - case 0: - ASSERT(last); - if (call->state == AFS_CALL_AWAIT_REPLY) - call->state = AFS_CALL_COMPLETE; - break; - case -ENOTCONN: - abort_code = RX_CALL_DEAD; - goto do_abort; - case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; - goto do_abort; - default: - unmarshal_error: - abort_code = RXGEN_CC_UNMARSHAL; - if (call->state != AFS_CALL_AWAIT_REPLY) - abort_code = RXGEN_SS_UNMARSHAL; - do_abort: - rxrpc_kernel_abort_call(afs_socket, - call->rxcall, - abort_code); - call->error = ret; - call->state = AFS_CALL_ERROR; - break; + _enter("%s", call->type->name); + + while (call->state == AFS_CALL_AWAIT_REPLY || + call->state == AFS_CALL_AWAIT_OP_ID || + call->state == AFS_CALL_AWAIT_REQUEST || + call->state == AFS_CALL_AWAIT_ACK + ) { + if (call->state == AFS_CALL_AWAIT_ACK) { + size_t offset = 0; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + NULL, 0, &offset, false, + &call->abort_code); + if (ret == -EINPROGRESS || ret == -EAGAIN) + return; + if (ret == 1) { + call->state = AFS_CALL_COMPLETE; + goto done; } - break; - case RXRPC_SKB_MARK_FINAL_ACK: - _debug("Rcv ACK"); - call->state = AFS_CALL_COMPLETE; - break; - case RXRPC_SKB_MARK_BUSY: - _debug("Rcv BUSY"); - call->error = -EBUSY; - call->state = AFS_CALL_BUSY; - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Rcv ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Loc ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_NET_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv NET ERROR %d", call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv LOCAL ERROR %d", call->error); - break; - default: - BUG(); - break; + return; } - afs_free_skb(skb); - } - - /* make sure the queue is empty if the call is done with (we might have - * aborted the call early because of an unmarshalling error) */ - if (call->state >= AFS_CALL_COMPLETE) { - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); - if (call->incoming) - afs_end_call(call); + ret = call->type->deliver(call); + switch (ret) { + case 0: + if (call->state == AFS_CALL_AWAIT_REPLY) + call->state = AFS_CALL_COMPLETE; + goto done; + case -EINPROGRESS: + case -EAGAIN: + goto out; + case -ENOTCONN: + abort_code = RX_CALL_DEAD; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + case -ENOTSUPP: + abort_code = RX_INVALID_OPERATION; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + case -ENODATA: + case -EBADMSG: + case -EMSGSIZE: + default: + abort_code = RXGEN_CC_UNMARSHAL; + if (call->state != AFS_CALL_AWAIT_REPLY) + abort_code = RXGEN_SS_UNMARSHAL; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + } } +done: + if (call->state == AFS_CALL_COMPLETE && call->incoming) + afs_end_call(call); +out: _leave(""); + return; + +do_abort: + call->error = ret; + call->state = AFS_CALL_COMPLETE; + goto done; } /* @@ -574,7 +463,6 @@ static void afs_deliver_to_call(struct afs_call *call) */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - struct sk_buff *skb; int ret; DECLARE_WAITQUEUE(myself, current); @@ -586,14 +474,15 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) set_current_state(TASK_INTERRUPTIBLE); /* deliver any messages that are in the queue */ - if (!skb_queue_empty(&call->rx_queue)) { + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); continue; } ret = call->error; - if (call->state >= AFS_CALL_COMPLETE) + if (call->state == AFS_CALL_COMPLETE) break; ret = -EINTR; if (signal_pending(current)) @@ -607,9 +496,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* kill the call */ if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_CALL_DEAD); } _debug("call complete"); @@ -621,17 +509,24 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* * wake up a waiting call */ -static void afs_wake_up_call_waiter(struct afs_call *call) +static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; wake_up(&call->waitq); } /* * wake up an asynchronous call */ -static void afs_wake_up_async_call(struct afs_call *call) +static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { - _enter(""); + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; queue_work(afs_async_calls, &call->async_work); } @@ -649,8 +544,10 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call) /* * delete an asynchronous call */ -static void afs_delete_async_call(struct afs_call *call) +static void afs_delete_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); afs_free_call(call); @@ -660,17 +557,19 @@ static void afs_delete_async_call(struct afs_call *call) /* * perform processing on an asynchronous call - * - on a multiple-thread workqueue this work item may try to run on several - * CPUs at the same time */ -static void afs_process_async_call(struct afs_call *call) +static void afs_process_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); - if (!skb_queue_empty(&call->rx_queue)) + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; afs_deliver_to_call(call); + } - if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) { + if (call->state == AFS_CALL_COMPLETE && call->wait_mode) { if (call->wait_mode->async_complete) call->wait_mode->async_complete(call->reply, call->error); @@ -681,45 +580,13 @@ static void afs_process_async_call(struct afs_call *call) /* we can't just delete the call because the work item may be * queued */ - call->async_workfn = afs_delete_async_call; + call->async_work.func = afs_delete_async_call; queue_work(afs_async_calls, &call->async_work); } _leave(""); } -/* - * Empty a socket buffer into a flat reply buffer. - */ -int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last) -{ - size_t len = skb->len; - - if (len > call->reply_max - call->reply_size) { - _leave(" = -EBADMSG [%zu > %u]", - len, call->reply_max - call->reply_size); - return -EBADMSG; - } - - if (len > 0) { - if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, - len) < 0) - BUG(); - call->reply_size += len; - } - - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } - return 0; -} - /* * accept the backlog of incoming calls */ @@ -727,14 +594,10 @@ static void afs_collect_incoming_call(struct work_struct *work) { struct rxrpc_call *rxcall; struct afs_call *call = NULL; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&afs_incoming_calls))) { - _debug("new call"); - /* don't need the notification */ - afs_free_skb(skb); + _enter(""); + do { if (!call) { call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); if (!call) { @@ -742,12 +605,10 @@ static void afs_collect_incoming_call(struct work_struct *work) return; } - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); call->wait_mode = &afs_async_incoming_call; call->type = &afs_RXCMxxxx; init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); call->state = AFS_CALL_AWAIT_OP_ID; _debug("CALL %p{%s} [%d]", @@ -757,46 +618,47 @@ static void afs_collect_incoming_call(struct work_struct *work) } rxcall = rxrpc_kernel_accept_call(afs_socket, - (unsigned long) call); + (unsigned long)call, + afs_wake_up_async_call); if (!IS_ERR(rxcall)) { call->rxcall = rxcall; + call->need_attention = true; + queue_work(afs_async_calls, &call->async_work); call = NULL; } - } + } while (!call); if (call) afs_free_call(call); } +/* + * Notification of an incoming call. + */ +static void afs_rx_new_call(struct sock *sk) +{ + queue_work(afs_wq, &afs_collect_incoming_call_work); +} + /* * Grab the operation ID from an incoming cache manager call. The socket * buffer is discarded on error or if we don't yet have sufficient data. */ -static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cm_op_id(struct afs_call *call) { - size_t len = skb->len; - void *oibuf = (void *) &call->operation_ID; + int ret; - _enter("{%u},{%zu},%d", call->offset, len, last); + _enter("{%zu}", call->offset); ASSERTCMP(call->offset, <, 4); /* the operation ID forms the first four bytes of the request data */ - len = min_t(size_t, len, 4 - call->offset); - if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0) - BUG(); - if (!pskb_pull(skb, len)) - BUG(); - call->offset += len; - - if (call->offset < 4) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; - } + ret = afs_extract_data(call, &call->operation_ID, 4, true); + if (ret < 0) + return ret; call->state = AFS_CALL_AWAIT_REQUEST; + call->offset = 0; /* ask the cache manager to route the call (it'll change the call type * if successful) */ @@ -805,7 +667,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, /* pass responsibility for the remainer of this message off to the * cache manager op */ - return call->type->deliver(call, skb, last); + return call->type->deliver(call); } /* @@ -881,25 +743,40 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) /* * Extract a piece of data from the received data socket buffers. */ -int afs_extract_data(struct afs_call *call, struct sk_buff *skb, - bool last, void *buf, size_t count) +int afs_extract_data(struct afs_call *call, void *buf, size_t count, + bool want_more) { - size_t len = skb->len; + int ret; - _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count); + _enter("{%s,%zu},,%zu,%d", + call->type->name, call->offset, count, want_more); - ASSERTCMP(call->offset, <, count); + ASSERTCMP(call->offset, <=, count); - len = min_t(size_t, len, count - call->offset); - if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 || - !pskb_pull(skb, len)) - BUG(); - call->offset += len; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + buf, count, &call->offset, + want_more, &call->abort_code); + if (ret == 0 || ret == -EAGAIN) + return ret; - if (call->offset < count) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; + if (ret == 1) { + switch (call->state) { + case AFS_CALL_AWAIT_REPLY: + call->state = AFS_CALL_COMPLETE; + break; + case AFS_CALL_AWAIT_REQUEST: + call->state = AFS_CALL_REPLYING; + break; + default: + break; + } + return 0; } - return 0; + + if (ret == -ECONNABORTED) + call->error = call->type->abort_to_error(call->abort_code); + else + call->error = ret; + call->state = AFS_CALL_COMPLETE; + return ret; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index f94d1abdc3eb..94bcd97d22b8 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -58,17 +58,16 @@ static int afs_vl_abort_to_error(u32 abort_code) /* * deliver reply data to a VL.GetEntryByXXX call */ -static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) { struct afs_cache_vlocation *entry; __be32 *bp; u32 tmp; int loop, ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f8d8079dc058..b4b6a3664dda 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -12,7 +12,6 @@ #ifndef _NET_RXRPC_H #define _NET_RXRPC_H -#include #include struct key; @@ -20,38 +19,26 @@ struct sock; struct socket; struct rxrpc_call; -/* - * the mark applied to socket buffers that may be intercepted - */ -enum rxrpc_skb_mark { - RXRPC_SKB_MARK_DATA, /* data message */ - RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ - RXRPC_SKB_MARK_BUSY, /* server busy message */ - RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ - RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ - RXRPC_SKB_MARK_NET_ERROR, /* network error message */ - RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ - RXRPC_SKB_MARK_NEW_CALL, /* local error message */ -}; +typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_notify_new_call_t)(struct sock *); -typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long, - struct sk_buff *); -void rxrpc_kernel_intercept_rx_messages(struct socket *, rxrpc_interceptor_t); +void rxrpc_kernel_new_call_notification(struct socket *, + rxrpc_notify_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, unsigned long, - gfp_t); + gfp_t, + rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); -void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); +int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, + void *, size_t, size_t *, bool, u32 *); void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); -bool rxrpc_kernel_is_data_last(struct sk_buff *); -u32 rxrpc_kernel_get_abort_code(struct sk_buff *); -int rxrpc_kernel_get_error_number(struct sk_buff *); -void rxrpc_kernel_free_skb(struct sk_buff *); -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); +struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index e07c91acd904..32d544995dda 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -231,6 +231,8 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @srx: The address of the peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use + * @gfp: The allocation constraints + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -243,7 +245,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct sockaddr_rxrpc *srx, struct key *key, unsigned long user_call_ID, - gfp_t gfp) + gfp_t gfp, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -270,6 +273,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + if (!IS_ERR(call)) + call->notify_rx = notify_rx; release_sock(&rx->sk); _leave(" = %p", call); @@ -289,31 +294,27 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); + rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_call(call); } EXPORT_SYMBOL(rxrpc_kernel_end_call); /** - * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages + * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on - * @interceptor: The function to pass the messages to + * @notify_new_call: Function to be called when new calls appear * - * Allow a kernel service to intercept messages heading for the Rx queue on an - * RxRPC socket. They get passed to the specified function instead. - * @interceptor should free the socket buffers it is given. @interceptor is - * called with the socket receive queue spinlock held and softirqs disabled - - * this ensures that the messages will be delivered in the right order. + * Allow a kernel service to be given notifications about new calls. */ -void rxrpc_kernel_intercept_rx_messages(struct socket *sock, - rxrpc_interceptor_t interceptor) +void rxrpc_kernel_new_call_notification( + struct socket *sock, + rxrpc_notify_new_call_t notify_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - _enter(""); - rx->interceptor = interceptor; + rx->notify_new_call = notify_new_call; } - -EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages); +EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); /* * connect an RxRPC socket diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0c320b2b7b43..4e86d248dc5e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -39,6 +39,20 @@ struct rxrpc_crypt { struct rxrpc_connection; +/* + * Mark applied to socket buffers. + */ +enum rxrpc_skb_mark { + RXRPC_SKB_MARK_DATA, /* data message */ + RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ + RXRPC_SKB_MARK_BUSY, /* server busy message */ + RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ + RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ + RXRPC_SKB_MARK_NET_ERROR, /* network error message */ + RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ + RXRPC_SKB_MARK_NEW_CALL, /* local error message */ +}; + /* * sk_state for RxRPC sockets */ @@ -57,7 +71,7 @@ enum { struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; - rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */ + rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ struct rxrpc_local *local; /* local endpoint */ struct list_head listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ @@ -367,6 +381,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ + RXRPC_CALL_RX_NO_MORE, /* Don't indicate MSG_MORE from recvmsg() */ }; /* @@ -441,6 +456,7 @@ struct rxrpc_call { struct timer_list resend_timer; /* Tx resend timer */ struct work_struct destroyer; /* call destroyer */ struct work_struct processor; /* packet processor and ACK generator */ + rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ @@ -448,6 +464,7 @@ struct rxrpc_call { struct rb_node sock_node; /* node in socket call tree */ struct sk_buff_head rx_queue; /* received packets */ struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ + struct sk_buff_head knlrecv_queue; /* Queue for kernel_recv [TODO: replace this] */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ @@ -512,7 +529,8 @@ extern struct workqueue_struct *rxrpc_workqueue; * call_accept.c */ void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_reject_call(struct rxrpc_sock *); /* @@ -874,6 +892,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *); /* * skbuff.c */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); void rxrpc_new_skb(struct sk_buff *); void rxrpc_see_skb(struct sk_buff *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 03af88fe798b..68a439e30df1 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -286,7 +286,8 @@ security_mismatch: * - assign the user call ID to the call at the front of the queue */ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; struct rb_node *parent, **pp; @@ -340,6 +341,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ + call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); @@ -437,17 +439,20 @@ out: * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call * @sock: The socket on which the impending call is waiting * @user_call_ID: The tag to attach to the call + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. + * call is still valid. The caller should immediately trigger their own + * notification as there must be data waiting. */ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID); + call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID, notify_rx); _leave(" = %p", call); return call; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 104ee8b1de06..516d8ea82f02 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -136,6 +136,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) INIT_LIST_HEAD(&call->accept_link); skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->rx_oos_queue); + skb_queue_head_init(&call->knlrecv_queue); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); @@ -552,8 +553,6 @@ void rxrpc_release_call(struct rxrpc_call *call) spin_lock_bh(&call->lock); } spin_unlock_bh(&call->lock); - - ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE); } del_timer_sync(&call->resend_timer); @@ -682,6 +681,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); rxrpc_purge_queue(&call->rx_queue); + rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_peer(call->peer); kmem_cache_free(rxrpc_call_jar, call); } @@ -737,6 +737,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) rxrpc_purge_queue(&call->rx_queue); ASSERT(skb_queue_empty(&call->rx_oos_queue)); + rxrpc_purge_queue(&call->knlrecv_queue); sock_put(&call->socket->sk); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index bc9b05938ff5..9db90f4f768d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -282,7 +282,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, case RXRPC_PACKET_TYPE_DATA: case RXRPC_PACKET_TYPE_ACK: rxrpc_conn_retransmit_call(conn, skb); - rxrpc_free_skb(skb); return 0; case RXRPC_PACKET_TYPE_ABORT: diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 86bea9ad6c3d..72f016cfaaf5 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -90,9 +90,15 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, } /* allow interception by a kernel service */ - if (rx->interceptor) { - rx->interceptor(sk, call->user_call_ID, skb); + if (skb->mark == RXRPC_SKB_MARK_NEW_CALL && + rx->notify_new_call) { spin_unlock_bh(&sk->sk_receive_queue.lock); + skb_queue_tail(&call->knlrecv_queue, skb); + rx->notify_new_call(&rx->sk); + } else if (call->notify_rx) { + spin_unlock_bh(&sk->sk_receive_queue.lock); + skb_queue_tail(&call->knlrecv_queue, skb); + call->notify_rx(&rx->sk, call, call->user_call_ID); } else { _net("post skb %p", skb); __skb_queue_tail(&sk->sk_receive_queue, skb); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index b1e708a12151..817ae801e769 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -190,7 +190,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (cmd == RXRPC_CMD_ACCEPT) { if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) return -EINVAL; - call = rxrpc_accept_call(rx, user_call_ID); + call = rxrpc_accept_call(rx, user_call_ID, NULL); if (IS_ERR(call)) return PTR_ERR(call); rxrpc_put_call(call); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index c9b38c7fb448..0ab7b334bab1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -369,55 +369,178 @@ wait_error: } -/** - * rxrpc_kernel_is_data_last - Determine if data message is last one - * @skb: Message holding data +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. * - * Determine if data message is last one for the parent call. + * TODO: Note that this is hacked in at the moment and will be replaced. */ -bool rxrpc_kernel_is_data_last(struct sk_buff *skb) +static int temp_deliver_data(struct socket *sock, struct rxrpc_call *call, + struct iov_iter *iter, size_t size, + size_t *_offset) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + size_t remain; + int ret, copy; + + _enter("%d", call->debug_id); + +next: + local_bh_disable(); + skb = skb_dequeue(&call->knlrecv_queue); + local_bh_enable(); + if (!skb) { + if (test_bit(RXRPC_CALL_RX_NO_MORE, &call->flags)) + return 1; + _leave(" = -EAGAIN [empty]"); + return -EAGAIN; + } - ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA); + sp = rxrpc_skb(skb); + _debug("dequeued %p %u/%zu", skb, sp->offset, size); - return sp->hdr.flags & RXRPC_LAST_PACKET; -} + switch (skb->mark) { + case RXRPC_SKB_MARK_DATA: + remain = size - *_offset; + if (remain > 0) { + copy = skb->len - sp->offset; + if (copy > remain) + copy = remain; + ret = skb_copy_datagram_iter(skb, sp->offset, iter, + copy); + if (ret < 0) + goto requeue_and_leave; -EXPORT_SYMBOL(rxrpc_kernel_is_data_last); + /* handle piecemeal consumption of data packets */ + sp->offset += copy; + *_offset += copy; + } -/** - * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message - * @skb: Message indicating an abort - * - * Get the abort code from an RxRPC abort message. - */ -u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + if (sp->offset < skb->len) + goto partially_used_skb; + + /* We consumed the whole packet */ + ASSERTCMP(sp->offset, ==, skb->len); + if (sp->hdr.flags & RXRPC_LAST_PACKET) + set_bit(RXRPC_CALL_RX_NO_MORE, &call->flags); + rxrpc_kernel_data_consumed(call, skb); + rxrpc_free_skb(skb); + goto next; - switch (skb->mark) { - case RXRPC_SKB_MARK_REMOTE_ABORT: - case RXRPC_SKB_MARK_LOCAL_ABORT: - return sp->call->abort_code; default: - BUG(); + rxrpc_free_skb(skb); + goto next; } -} -EXPORT_SYMBOL(rxrpc_kernel_get_abort_code); +partially_used_skb: + ASSERTCMP(*_offset, ==, size); + ret = 0; +requeue_and_leave: + skb_queue_head(&call->knlrecv_queue, skb); + return ret; +} /** - * rxrpc_kernel_get_error - Get the error number from an RxRPC error message - * @skb: Message indicating an error + * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info + * @sock: The socket that the call exists on + * @call: The call to send data through + * @buf: The buffer to receive into + * @size: The size of the buffer, including data already read + * @_offset: The running offset into the buffer. + * @want_more: True if more data is expected to be read + * @_abort: Where the abort code is stored if -ECONNABORTED is returned + * + * Allow a kernel service to receive data and pick up information about the + * state of a call. Returns 0 if got what was asked for and there's more + * available, 1 if we got what was asked for and we're at the end of the data + * and -EAGAIN if we need more data. + * + * Note that we may return -EAGAIN to drain empty packets at the end of the + * data, even if we've already copied over the requested data. * - * Get the error number from an RxRPC error message. + * This function adds the amount it transfers to *_offset, so this should be + * precleared as appropriate. Note that the amount remaining in the buffer is + * taken to be size - *_offset. + * + * *_abort should also be initialised to 0. */ -int rxrpc_kernel_get_error_number(struct sk_buff *skb) +int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, + void *buf, size_t size, size_t *_offset, + bool want_more, u32 *_abort) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct iov_iter iter; + struct kvec iov; + int ret; - return sp->error; -} + _enter("{%d,%s},%zu,%d", + call->debug_id, rxrpc_call_states[call->state], size, want_more); + + ASSERTCMP(*_offset, <=, size); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); -EXPORT_SYMBOL(rxrpc_kernel_get_error_number); + iov.iov_base = buf + *_offset; + iov.iov_len = size - *_offset; + iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); + + lock_sock(sock->sk); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = temp_deliver_data(sock, call, &iter, size, _offset); + if (ret < 0) + goto out; + + /* We can only reach here with a partially full buffer if we + * have reached the end of the data. We must otherwise have a + * full buffer or have been given -EAGAIN. + */ + if (ret == 1) { + if (*_offset < size) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; + goto out; + } + + if (!want_more) + goto excess_data; + goto out; + + case RXRPC_CALL_COMPLETE: + goto call_complete; + + default: + *_offset = 0; + ret = -EINPROGRESS; + goto out; + } + +read_phase_complete: + ret = 1; +out: + release_sock(sock->sk); + _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); + return ret; + +short_data: + ret = -EBADMSG; + goto out; +excess_data: + ret = -EMSGSIZE; + goto out; +call_complete: + *_abort = call->abort_code; + ret = call->error; + if (call->completion == RXRPC_CALL_SUCCEEDED) { + ret = 1; + if (size > 0) + ret = -ECONNRESET; + } + goto out; +} +EXPORT_SYMBOL(rxrpc_kernel_recv_data); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 20529205bb8c..9752f8b1fdd0 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -127,7 +127,6 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) call->rx_data_recv = sp->hdr.seq; rxrpc_hard_ACK_data(call, skb); } -EXPORT_SYMBOL(rxrpc_kernel_data_consumed); /* * Destroy a packet that has an RxRPC control buffer -- cgit v1.2.3 From b9b17debc69d27cd55e21ee51a5ba7fc50a426cf Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 31 Aug 2016 18:22:08 -0500 Subject: net: emac: emac gigabit ethernet controller driver Add support for the Qualcomm Technologies, Inc. EMAC gigabit Ethernet controller. This driver supports the following features: 1) Checksum offload. 2) Interrupt coalescing support. 3) SGMII phy. 4) phylib interface for external phy Based on original work by Niranjana Vishwanathapura Gilad Avidov Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- .../devicetree/bindings/net/qcom-emac.txt | 111 ++ MAINTAINERS | 6 + drivers/net/ethernet/qualcomm/Kconfig | 12 + drivers/net/ethernet/qualcomm/Makefile | 2 + drivers/net/ethernet/qualcomm/emac/Makefile | 7 + drivers/net/ethernet/qualcomm/emac/emac-mac.c | 1528 ++++++++++++++++++++ drivers/net/ethernet/qualcomm/emac/emac-mac.h | 248 ++++ drivers/net/ethernet/qualcomm/emac/emac-phy.c | 204 +++ drivers/net/ethernet/qualcomm/emac/emac-phy.h | 33 + drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 721 +++++++++ drivers/net/ethernet/qualcomm/emac/emac-sgmii.h | 24 + drivers/net/ethernet/qualcomm/emac/emac.c | 743 ++++++++++ drivers/net/ethernet/qualcomm/emac/emac.h | 335 +++++ 13 files changed, 3974 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/qcom-emac.txt create mode 100644 drivers/net/ethernet/qualcomm/emac/Makefile create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-mac.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-mac.h create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-phy.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-phy.h create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-sgmii.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-sgmii.h create mode 100644 drivers/net/ethernet/qualcomm/emac/emac.c create mode 100644 drivers/net/ethernet/qualcomm/emac/emac.h (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt b/Documentation/devicetree/bindings/net/qcom-emac.txt new file mode 100644 index 000000000000..346e6c7f47b7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/qcom-emac.txt @@ -0,0 +1,111 @@ +Qualcomm Technologies EMAC Gigabit Ethernet Controller + +This network controller consists of two devices: a MAC and an SGMII +internal PHY. Each device is represented by a device tree node. A phandle +connects the MAC node to its corresponding internal phy node. Another +phandle points to the external PHY node. + +Required properties: + +MAC node: +- compatible : Should be "qcom,fsm9900-emac". +- reg : Offset and length of the register regions for the device +- interrupts : Interrupt number used by this controller +- mac-address : The 6-byte MAC address. If present, it is the default + MAC address. +- internal-phy : phandle to the internal PHY node +- phy-handle : phandle the the external PHY node + +Internal PHY node: +- compatible : Should be "qcom,fsm9900-emac-sgmii" or "qcom,qdf2432-emac-sgmii". +- reg : Offset and length of the register region(s) for the device +- interrupts : Interrupt number used by this controller + +The external phy child node: +- reg : The phy address + +Example: + +FSM9900: + +soc { + #address-cells = <1>; + #size-cells = <1>; + + emac0: ethernet@feb20000 { + compatible = "qcom,fsm9900-emac"; + reg = <0xfeb20000 0x10000>, + <0xfeb36000 0x1000>; + interrupts = <76>; + + clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>, + <&gcc 6>, <&gcc 7>; + clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk", + "mdio_clk", "tx_clk", "rx_clk", "sys_clk"; + + internal-phy = <&emac_sgmii>; + + phy-handle = <&phy0>; + + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; + + pinctrl-names = "default"; + pinctrl-0 = <&mdio_pins_a>; + }; + + emac_sgmii: ethernet@feb38000 { + compatible = "qcom,fsm9900-emac-sgmii"; + reg = <0xfeb38000 0x1000>; + interrupts = <80>; + }; + + tlmm: pinctrl@fd510000 { + compatible = "qcom,fsm9900-pinctrl"; + + mdio_pins_a: mdio { + state { + pins = "gpio123", "gpio124"; + function = "mdio"; + }; + }; + }; + + +QDF2432: + +soc { + #address-cells = <2>; + #size-cells = <2>; + + emac0: ethernet@38800000 { + compatible = "qcom,fsm9900-emac"; + reg = <0x0 0x38800000 0x0 0x10000>, + <0x0 0x38816000 0x0 0x1000>; + interrupts = <0 256 4>; + + clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>, + <&gcc 6>, <&gcc 7>; + clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk", + "mdio_clk", "tx_clk", "rx_clk", "sys_clk"; + + internal-phy = <&emac_sgmii>; + + phy-handle = <&phy0>; + + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@4 { + reg = <4>; + }; + }; + + emac_sgmii: ethernet@410400 { + compatible = "qcom,qdf2432-emac-sgmii"; + reg = <0x0 0x00410400 0x0 0xc00>, /* Base address */ + <0x0 0x00410000 0x0 0x400>; /* Per-lane digital */ + interrupts = <0 254 1>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 58b50296e0ee..734e03556e2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9696,6 +9696,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git S: Supported F: drivers/net/wireless/ath/ath10k/ +QUALCOMM EMAC GIGABIT ETHERNET DRIVER +M: Timur Tabi +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/qualcomm/emac/ + QUALCOMM HEXAGON ARCHITECTURE M: Richard Kuo L: linux-hexagon@vger.kernel.org diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index a76e380cf89a..9ba568db576f 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -24,4 +24,16 @@ config QCA7000 To compile this driver as a module, choose M here. The module will be called qcaspi. +config QCOM_EMAC + tristate "Qualcomm Technologies, Inc. EMAC Gigabit Ethernet support" + select CRC32 + select PHYLIB + ---help--- + This driver supports the Qualcomm Technologies, Inc. Gigabit + Ethernet Media Access Controller (EMAC). The controller + supports IEEE 802.3-2002, half-duplex mode at 10/100 Mb/s, + full-duplex mode at 10/100/1000Mb/s, Wake On LAN (WOL) for + low power, Receive-Side Scaling (RSS), and IEEE 1588-2008 + Precision Clock Synchronization Protocol. + endif # NET_VENDOR_QUALCOMM diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile index 9da2d75db700..aacb0a585c68 100644 --- a/drivers/net/ethernet/qualcomm/Makefile +++ b/drivers/net/ethernet/qualcomm/Makefile @@ -4,3 +4,5 @@ obj-$(CONFIG_QCA7000) += qcaspi.o qcaspi-objs := qca_spi.o qca_framing.o qca_7k.o qca_debug.o + +obj-y += emac/ diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile new file mode 100644 index 000000000000..01ee144c6386 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Qualcomm Technologies, Inc. EMAC Gigabit Ethernet driver +# + +obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o + +qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c new file mode 100644 index 000000000000..e97968ed4b8f --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -0,0 +1,1528 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC Ethernet Controller MAC layer support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-sgmii.h" + +/* EMAC base register offsets */ +#define EMAC_MAC_CTRL 0x001480 +#define EMAC_WOL_CTRL0 0x0014a0 +#define EMAC_RSS_KEY0 0x0014b0 +#define EMAC_H1TPD_BASE_ADDR_LO 0x0014e0 +#define EMAC_H2TPD_BASE_ADDR_LO 0x0014e4 +#define EMAC_H3TPD_BASE_ADDR_LO 0x0014e8 +#define EMAC_INTER_SRAM_PART9 0x001534 +#define EMAC_DESC_CTRL_0 0x001540 +#define EMAC_DESC_CTRL_1 0x001544 +#define EMAC_DESC_CTRL_2 0x001550 +#define EMAC_DESC_CTRL_10 0x001554 +#define EMAC_DESC_CTRL_12 0x001558 +#define EMAC_DESC_CTRL_13 0x00155c +#define EMAC_DESC_CTRL_3 0x001560 +#define EMAC_DESC_CTRL_4 0x001564 +#define EMAC_DESC_CTRL_5 0x001568 +#define EMAC_DESC_CTRL_14 0x00156c +#define EMAC_DESC_CTRL_15 0x001570 +#define EMAC_DESC_CTRL_16 0x001574 +#define EMAC_DESC_CTRL_6 0x001578 +#define EMAC_DESC_CTRL_8 0x001580 +#define EMAC_DESC_CTRL_9 0x001584 +#define EMAC_DESC_CTRL_11 0x001588 +#define EMAC_TXQ_CTRL_0 0x001590 +#define EMAC_TXQ_CTRL_1 0x001594 +#define EMAC_TXQ_CTRL_2 0x001598 +#define EMAC_RXQ_CTRL_0 0x0015a0 +#define EMAC_RXQ_CTRL_1 0x0015a4 +#define EMAC_RXQ_CTRL_2 0x0015a8 +#define EMAC_RXQ_CTRL_3 0x0015ac +#define EMAC_BASE_CPU_NUMBER 0x0015b8 +#define EMAC_DMA_CTRL 0x0015c0 +#define EMAC_MAILBOX_0 0x0015e0 +#define EMAC_MAILBOX_5 0x0015e4 +#define EMAC_MAILBOX_6 0x0015e8 +#define EMAC_MAILBOX_13 0x0015ec +#define EMAC_MAILBOX_2 0x0015f4 +#define EMAC_MAILBOX_3 0x0015f8 +#define EMAC_MAILBOX_11 0x00160c +#define EMAC_AXI_MAST_CTRL 0x001610 +#define EMAC_MAILBOX_12 0x001614 +#define EMAC_MAILBOX_9 0x001618 +#define EMAC_MAILBOX_10 0x00161c +#define EMAC_ATHR_HEADER_CTRL 0x001620 +#define EMAC_CLK_GATE_CTRL 0x001814 +#define EMAC_MISC_CTRL 0x001990 +#define EMAC_MAILBOX_7 0x0019e0 +#define EMAC_MAILBOX_8 0x0019e4 +#define EMAC_MAILBOX_15 0x001bd4 +#define EMAC_MAILBOX_16 0x001bd8 + +/* EMAC_MAC_CTRL */ +#define SINGLE_PAUSE_MODE 0x10000000 +#define DEBUG_MODE 0x08000000 +#define BROAD_EN 0x04000000 +#define MULTI_ALL 0x02000000 +#define RX_CHKSUM_EN 0x01000000 +#define HUGE 0x00800000 +#define SPEED(x) (((x) & 0x3) << 20) +#define SPEED_MASK SPEED(0x3) +#define SIMR 0x00080000 +#define TPAUSE 0x00010000 +#define PROM_MODE 0x00008000 +#define VLAN_STRIP 0x00004000 +#define PRLEN_BMSK 0x00003c00 +#define PRLEN_SHFT 10 +#define HUGEN 0x00000200 +#define FLCHK 0x00000100 +#define PCRCE 0x00000080 +#define CRCE 0x00000040 +#define FULLD 0x00000020 +#define MAC_LP_EN 0x00000010 +#define RXFC 0x00000008 +#define TXFC 0x00000004 +#define RXEN 0x00000002 +#define TXEN 0x00000001 + + +/* EMAC_WOL_CTRL0 */ +#define LK_CHG_PME 0x20 +#define LK_CHG_EN 0x10 +#define MG_FRAME_PME 0x8 +#define MG_FRAME_EN 0x4 +#define WK_FRAME_EN 0x1 + +/* EMAC_DESC_CTRL_3 */ +#define RFD_RING_SIZE_BMSK 0xfff + +/* EMAC_DESC_CTRL_4 */ +#define RX_BUFFER_SIZE_BMSK 0xffff + +/* EMAC_DESC_CTRL_6 */ +#define RRD_RING_SIZE_BMSK 0xfff + +/* EMAC_DESC_CTRL_9 */ +#define TPD_RING_SIZE_BMSK 0xffff + +/* EMAC_TXQ_CTRL_0 */ +#define NUM_TXF_BURST_PREF_BMSK 0xffff0000 +#define NUM_TXF_BURST_PREF_SHFT 16 +#define LS_8023_SP 0x80 +#define TXQ_MODE 0x40 +#define TXQ_EN 0x20 +#define IP_OP_SP 0x10 +#define NUM_TPD_BURST_PREF_BMSK 0xf +#define NUM_TPD_BURST_PREF_SHFT 0 + +/* EMAC_TXQ_CTRL_1 */ +#define JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK 0x7ff + +/* EMAC_TXQ_CTRL_2 */ +#define TXF_HWM_BMSK 0xfff0000 +#define TXF_LWM_BMSK 0xfff + +/* EMAC_RXQ_CTRL_0 */ +#define RXQ_EN BIT(31) +#define CUT_THRU_EN BIT(30) +#define RSS_HASH_EN BIT(29) +#define NUM_RFD_BURST_PREF_BMSK 0x3f00000 +#define NUM_RFD_BURST_PREF_SHFT 20 +#define IDT_TABLE_SIZE_BMSK 0x1ff00 +#define IDT_TABLE_SIZE_SHFT 8 +#define SP_IPV6 0x80 + +/* EMAC_RXQ_CTRL_1 */ +#define JUMBO_1KAH_BMSK 0xf000 +#define JUMBO_1KAH_SHFT 12 +#define RFD_PREF_LOW_TH 0x10 +#define RFD_PREF_LOW_THRESHOLD_BMSK 0xfc0 +#define RFD_PREF_LOW_THRESHOLD_SHFT 6 +#define RFD_PREF_UP_TH 0x10 +#define RFD_PREF_UP_THRESHOLD_BMSK 0x3f +#define RFD_PREF_UP_THRESHOLD_SHFT 0 + +/* EMAC_RXQ_CTRL_2 */ +#define RXF_DOF_THRESFHOLD 0x1a0 +#define RXF_DOF_THRESHOLD_BMSK 0xfff0000 +#define RXF_DOF_THRESHOLD_SHFT 16 +#define RXF_UOF_THRESFHOLD 0xbe +#define RXF_UOF_THRESHOLD_BMSK 0xfff +#define RXF_UOF_THRESHOLD_SHFT 0 + +/* EMAC_RXQ_CTRL_3 */ +#define RXD_TIMER_BMSK 0xffff0000 +#define RXD_THRESHOLD_BMSK 0xfff +#define RXD_THRESHOLD_SHFT 0 + +/* EMAC_DMA_CTRL */ +#define DMAW_DLY_CNT_BMSK 0xf0000 +#define DMAW_DLY_CNT_SHFT 16 +#define DMAR_DLY_CNT_BMSK 0xf800 +#define DMAR_DLY_CNT_SHFT 11 +#define DMAR_REQ_PRI 0x400 +#define REGWRBLEN_BMSK 0x380 +#define REGWRBLEN_SHFT 7 +#define REGRDBLEN_BMSK 0x70 +#define REGRDBLEN_SHFT 4 +#define OUT_ORDER_MODE 0x4 +#define ENH_ORDER_MODE 0x2 +#define IN_ORDER_MODE 0x1 + +/* EMAC_MAILBOX_13 */ +#define RFD3_PROC_IDX_BMSK 0xfff0000 +#define RFD3_PROC_IDX_SHFT 16 +#define RFD3_PROD_IDX_BMSK 0xfff +#define RFD3_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_2 */ +#define NTPD_CONS_IDX_BMSK 0xffff0000 +#define NTPD_CONS_IDX_SHFT 16 + +/* EMAC_MAILBOX_3 */ +#define RFD0_CONS_IDX_BMSK 0xfff +#define RFD0_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_11 */ +#define H3TPD_PROD_IDX_BMSK 0xffff0000 +#define H3TPD_PROD_IDX_SHFT 16 + +/* EMAC_AXI_MAST_CTRL */ +#define DATA_BYTE_SWAP 0x8 +#define MAX_BOUND 0x2 +#define MAX_BTYPE 0x1 + +/* EMAC_MAILBOX_12 */ +#define H3TPD_CONS_IDX_BMSK 0xffff0000 +#define H3TPD_CONS_IDX_SHFT 16 + +/* EMAC_MAILBOX_9 */ +#define H2TPD_PROD_IDX_BMSK 0xffff +#define H2TPD_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_10 */ +#define H1TPD_CONS_IDX_BMSK 0xffff0000 +#define H1TPD_CONS_IDX_SHFT 16 +#define H2TPD_CONS_IDX_BMSK 0xffff +#define H2TPD_CONS_IDX_SHFT 0 + +/* EMAC_ATHR_HEADER_CTRL */ +#define HEADER_CNT_EN 0x2 +#define HEADER_ENABLE 0x1 + +/* EMAC_MAILBOX_0 */ +#define RFD0_PROC_IDX_BMSK 0xfff0000 +#define RFD0_PROC_IDX_SHFT 16 +#define RFD0_PROD_IDX_BMSK 0xfff +#define RFD0_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_5 */ +#define RFD1_PROC_IDX_BMSK 0xfff0000 +#define RFD1_PROC_IDX_SHFT 16 +#define RFD1_PROD_IDX_BMSK 0xfff +#define RFD1_PROD_IDX_SHFT 0 + +/* EMAC_MISC_CTRL */ +#define RX_UNCPL_INT_EN 0x1 + +/* EMAC_MAILBOX_7 */ +#define RFD2_CONS_IDX_BMSK 0xfff0000 +#define RFD2_CONS_IDX_SHFT 16 +#define RFD1_CONS_IDX_BMSK 0xfff +#define RFD1_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_8 */ +#define RFD3_CONS_IDX_BMSK 0xfff +#define RFD3_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_15 */ +#define NTPD_PROD_IDX_BMSK 0xffff +#define NTPD_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_16 */ +#define H1TPD_PROD_IDX_BMSK 0xffff +#define H1TPD_PROD_IDX_SHFT 0 + +#define RXQ0_RSS_HSTYP_IPV6_TCP_EN 0x20 +#define RXQ0_RSS_HSTYP_IPV6_EN 0x10 +#define RXQ0_RSS_HSTYP_IPV4_TCP_EN 0x8 +#define RXQ0_RSS_HSTYP_IPV4_EN 0x4 + +/* EMAC_EMAC_WRAPPER_TX_TS_INX */ +#define EMAC_WRAPPER_TX_TS_EMPTY BIT(31) +#define EMAC_WRAPPER_TX_TS_INX_BMSK 0xffff + +struct emac_skb_cb { + u32 tpd_idx; + unsigned long jiffies; +}; + +#define EMAC_SKB_CB(skb) ((struct emac_skb_cb *)(skb)->cb) +#define EMAC_RSS_IDT_SIZE 256 +#define JUMBO_1KAH 0x4 +#define RXD_TH 0x100 +#define EMAC_TPD_LAST_FRAGMENT 0x80000000 +#define EMAC_TPD_TSTAMP_SAVE 0x80000000 + +/* EMAC Errors in emac_rrd.word[3] */ +#define EMAC_RRD_L4F BIT(14) +#define EMAC_RRD_IPF BIT(15) +#define EMAC_RRD_CRC BIT(21) +#define EMAC_RRD_FAE BIT(22) +#define EMAC_RRD_TRN BIT(23) +#define EMAC_RRD_RNT BIT(24) +#define EMAC_RRD_INC BIT(25) +#define EMAC_RRD_FOV BIT(29) +#define EMAC_RRD_LEN BIT(30) + +/* Error bits that will result in a received frame being discarded */ +#define EMAC_RRD_ERROR (EMAC_RRD_IPF | EMAC_RRD_CRC | EMAC_RRD_FAE | \ + EMAC_RRD_TRN | EMAC_RRD_RNT | EMAC_RRD_INC | \ + EMAC_RRD_FOV | EMAC_RRD_LEN) +#define EMAC_RRD_STATS_DW_IDX 3 + +#define EMAC_RRD(RXQ, SIZE, IDX) ((RXQ)->rrd.v_addr + (SIZE * (IDX))) +#define EMAC_RFD(RXQ, SIZE, IDX) ((RXQ)->rfd.v_addr + (SIZE * (IDX))) +#define EMAC_TPD(TXQ, SIZE, IDX) ((TXQ)->tpd.v_addr + (SIZE * (IDX))) + +#define GET_RFD_BUFFER(RXQ, IDX) (&((RXQ)->rfd.rfbuff[(IDX)])) +#define GET_TPD_BUFFER(RTQ, IDX) (&((RTQ)->tpd.tpbuff[(IDX)])) + +#define EMAC_TX_POLL_HWTXTSTAMP_THRESHOLD 8 + +#define ISR_RX_PKT (\ + RX_PKT_INT0 |\ + RX_PKT_INT1 |\ + RX_PKT_INT2 |\ + RX_PKT_INT3) + +#define EMAC_MAC_IRQ_RES "core0" + +void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr) +{ + u32 crc32, bit, reg, mta; + + /* Calculate the CRC of the MAC address */ + crc32 = ether_crc(ETH_ALEN, addr); + + /* The HASH Table is an array of 2 32-bit registers. It is + * treated like an array of 64 bits (BitArray[hash_value]). + * Use the upper 6 bits of the above CRC as the hash value. + */ + reg = (crc32 >> 31) & 0x1; + bit = (crc32 >> 26) & 0x1F; + + mta = readl(adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); + mta |= BIT(bit); + writel(mta, adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); +} + +void emac_mac_multicast_addr_clear(struct emac_adapter *adpt) +{ + writel(0, adpt->base + EMAC_HASH_TAB_REG0); + writel(0, adpt->base + EMAC_HASH_TAB_REG1); +} + +/* definitions for RSS */ +#define EMAC_RSS_KEY(_i, _type) \ + (EMAC_RSS_KEY0 + ((_i) * sizeof(_type))) +#define EMAC_RSS_TBL(_i, _type) \ + (EMAC_IDT_TABLE0 + ((_i) * sizeof(_type))) + +/* Config MAC modes */ +void emac_mac_mode_config(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + u32 mac; + + mac = readl(adpt->base + EMAC_MAC_CTRL); + mac &= ~(VLAN_STRIP | PROM_MODE | MULTI_ALL | MAC_LP_EN); + + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + mac |= VLAN_STRIP; + + if (netdev->flags & IFF_PROMISC) + mac |= PROM_MODE; + + if (netdev->flags & IFF_ALLMULTI) + mac |= MULTI_ALL; + + writel(mac, adpt->base + EMAC_MAC_CTRL); +} + +/* Config descriptor rings */ +static void emac_mac_dma_rings_config(struct emac_adapter *adpt) +{ + static const unsigned short tpd_q_offset[] = { + EMAC_DESC_CTRL_8, EMAC_H1TPD_BASE_ADDR_LO, + EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO}; + static const unsigned short rfd_q_offset[] = { + EMAC_DESC_CTRL_2, EMAC_DESC_CTRL_10, + EMAC_DESC_CTRL_12, EMAC_DESC_CTRL_13}; + static const unsigned short rrd_q_offset[] = { + EMAC_DESC_CTRL_5, EMAC_DESC_CTRL_14, + EMAC_DESC_CTRL_15, EMAC_DESC_CTRL_16}; + + /* TPD (Transmit Packet Descriptor) */ + writel(upper_32_bits(adpt->tx_q.tpd.dma_addr), + adpt->base + EMAC_DESC_CTRL_1); + + writel(lower_32_bits(adpt->tx_q.tpd.dma_addr), + adpt->base + tpd_q_offset[0]); + + writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_9); + + /* RFD (Receive Free Descriptor) & RRD (Receive Return Descriptor) */ + writel(upper_32_bits(adpt->rx_q.rfd.dma_addr), + adpt->base + EMAC_DESC_CTRL_0); + + writel(lower_32_bits(adpt->rx_q.rfd.dma_addr), + adpt->base + rfd_q_offset[0]); + writel(lower_32_bits(adpt->rx_q.rrd.dma_addr), + adpt->base + rrd_q_offset[0]); + + writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_3); + writel(adpt->rx_q.rrd.count & RRD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_6); + + writel(adpt->rxbuf_size & RX_BUFFER_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_4); + + writel(0, adpt->base + EMAC_DESC_CTRL_11); + + /* Load all of the base addresses above and ensure that triggering HW to + * read ring pointers is flushed + */ + writel(1, adpt->base + EMAC_INTER_SRAM_PART9); +} + +/* Config transmit parameters */ +static void emac_mac_tx_config(struct emac_adapter *adpt) +{ + u32 val; + + writel((EMAC_MAX_TX_OFFLOAD_THRESH >> 3) & + JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK, adpt->base + EMAC_TXQ_CTRL_1); + + val = (adpt->tpd_burst << NUM_TPD_BURST_PREF_SHFT) & + NUM_TPD_BURST_PREF_BMSK; + + val |= TXQ_MODE | LS_8023_SP; + val |= (0x0100 << NUM_TXF_BURST_PREF_SHFT) & + NUM_TXF_BURST_PREF_BMSK; + + writel(val, adpt->base + EMAC_TXQ_CTRL_0); + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_2, + (TXF_HWM_BMSK | TXF_LWM_BMSK), 0); +} + +/* Config receive parameters */ +static void emac_mac_rx_config(struct emac_adapter *adpt) +{ + u32 val; + + val = (adpt->rfd_burst << NUM_RFD_BURST_PREF_SHFT) & + NUM_RFD_BURST_PREF_BMSK; + val |= (SP_IPV6 | CUT_THRU_EN); + + writel(val, adpt->base + EMAC_RXQ_CTRL_0); + + val = readl(adpt->base + EMAC_RXQ_CTRL_1); + val &= ~(JUMBO_1KAH_BMSK | RFD_PREF_LOW_THRESHOLD_BMSK | + RFD_PREF_UP_THRESHOLD_BMSK); + val |= (JUMBO_1KAH << JUMBO_1KAH_SHFT) | + (RFD_PREF_LOW_TH << RFD_PREF_LOW_THRESHOLD_SHFT) | + (RFD_PREF_UP_TH << RFD_PREF_UP_THRESHOLD_SHFT); + writel(val, adpt->base + EMAC_RXQ_CTRL_1); + + val = readl(adpt->base + EMAC_RXQ_CTRL_2); + val &= ~(RXF_DOF_THRESHOLD_BMSK | RXF_UOF_THRESHOLD_BMSK); + val |= (RXF_DOF_THRESFHOLD << RXF_DOF_THRESHOLD_SHFT) | + (RXF_UOF_THRESFHOLD << RXF_UOF_THRESHOLD_SHFT); + writel(val, adpt->base + EMAC_RXQ_CTRL_2); + + val = readl(adpt->base + EMAC_RXQ_CTRL_3); + val &= ~(RXD_TIMER_BMSK | RXD_THRESHOLD_BMSK); + val |= RXD_TH << RXD_THRESHOLD_SHFT; + writel(val, adpt->base + EMAC_RXQ_CTRL_3); +} + +/* Config dma */ +static void emac_mac_dma_config(struct emac_adapter *adpt) +{ + u32 dma_ctrl = DMAR_REQ_PRI; + + switch (adpt->dma_order) { + case emac_dma_ord_in: + dma_ctrl |= IN_ORDER_MODE; + break; + case emac_dma_ord_enh: + dma_ctrl |= ENH_ORDER_MODE; + break; + case emac_dma_ord_out: + dma_ctrl |= OUT_ORDER_MODE; + break; + default: + break; + } + + dma_ctrl |= (((u32)adpt->dmar_block) << REGRDBLEN_SHFT) & + REGRDBLEN_BMSK; + dma_ctrl |= (((u32)adpt->dmaw_block) << REGWRBLEN_SHFT) & + REGWRBLEN_BMSK; + dma_ctrl |= (((u32)adpt->dmar_dly_cnt) << DMAR_DLY_CNT_SHFT) & + DMAR_DLY_CNT_BMSK; + dma_ctrl |= (((u32)adpt->dmaw_dly_cnt) << DMAW_DLY_CNT_SHFT) & + DMAW_DLY_CNT_BMSK; + + /* config DMA and ensure that configuration is flushed to HW */ + writel(dma_ctrl, adpt->base + EMAC_DMA_CTRL); +} + +/* set MAC address */ +static void emac_set_mac_address(struct emac_adapter *adpt, u8 *addr) +{ + u32 sta; + + /* for example: 00-A0-C6-11-22-33 + * 0<-->C6112233, 1<-->00A0. + */ + + /* low 32bit word */ + sta = (((u32)addr[2]) << 24) | (((u32)addr[3]) << 16) | + (((u32)addr[4]) << 8) | (((u32)addr[5])); + writel(sta, adpt->base + EMAC_MAC_STA_ADDR0); + + /* hight 32bit word */ + sta = (((u32)addr[0]) << 8) | (u32)addr[1]; + writel(sta, adpt->base + EMAC_MAC_STA_ADDR1); +} + +static void emac_mac_config(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + unsigned int max_frame; + u32 val; + + emac_set_mac_address(adpt, netdev->dev_addr); + + max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + adpt->rxbuf_size = netdev->mtu > EMAC_DEF_RX_BUF_SIZE ? + ALIGN(max_frame, 8) : EMAC_DEF_RX_BUF_SIZE; + + emac_mac_dma_rings_config(adpt); + + writel(netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, + adpt->base + EMAC_MAX_FRAM_LEN_CTRL); + + emac_mac_tx_config(adpt); + emac_mac_rx_config(adpt); + emac_mac_dma_config(adpt); + + val = readl(adpt->base + EMAC_AXI_MAST_CTRL); + val &= ~(DATA_BYTE_SWAP | MAX_BOUND); + val |= MAX_BTYPE; + writel(val, adpt->base + EMAC_AXI_MAST_CTRL); + writel(0, adpt->base + EMAC_CLK_GATE_CTRL); + writel(RX_UNCPL_INT_EN, adpt->base + EMAC_MISC_CTRL); +} + +void emac_mac_reset(struct emac_adapter *adpt) +{ + emac_mac_stop(adpt); + + emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, SOFT_RST); + usleep_range(100, 150); /* reset may take up to 100usec */ + + /* interrupt clear-on-read */ + emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, INT_RD_CLR_EN); +} + +void emac_mac_start(struct emac_adapter *adpt) +{ + struct phy_device *phydev = adpt->phydev; + u32 mac, csr1; + + /* enable tx queue */ + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, 0, TXQ_EN); + + /* enable rx queue */ + emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, 0, RXQ_EN); + + /* enable mac control */ + mac = readl(adpt->base + EMAC_MAC_CTRL); + csr1 = readl(adpt->csr + EMAC_EMAC_WRAPPER_CSR1); + + mac |= TXEN | RXEN; /* enable RX/TX */ + + /* We don't have ethtool support yet, so force flow-control mode + * to 'full' always. + */ + mac |= TXFC | RXFC; + + /* setup link speed */ + mac &= ~SPEED_MASK; + if (phydev->speed == SPEED_1000) { + mac |= SPEED(2); + csr1 |= FREQ_MODE; + } else { + mac |= SPEED(1); + csr1 &= ~FREQ_MODE; + } + + if (phydev->duplex == DUPLEX_FULL) + mac |= FULLD; + else + mac &= ~FULLD; + + /* other parameters */ + mac |= (CRCE | PCRCE); + mac |= ((adpt->preamble << PRLEN_SHFT) & PRLEN_BMSK); + mac |= BROAD_EN; + mac |= FLCHK; + mac &= ~RX_CHKSUM_EN; + mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL | + DEBUG_MODE | SINGLE_PAUSE_MODE); + + writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1); + + writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL); + + /* enable interrupt read clear, low power sleep mode and + * the irq moderators + */ + + writel_relaxed(adpt->irq_mod, adpt->base + EMAC_IRQ_MOD_TIM_INIT); + writel_relaxed(INT_RD_CLR_EN | LPW_MODE | IRQ_MODERATOR_EN | + IRQ_MODERATOR2_EN, adpt->base + EMAC_DMA_MAS_CTRL); + + emac_mac_mode_config(adpt); + + emac_reg_update32(adpt->base + EMAC_ATHR_HEADER_CTRL, + (HEADER_ENABLE | HEADER_CNT_EN), 0); + + emac_reg_update32(adpt->csr + EMAC_EMAC_WRAPPER_CSR2, 0, WOL_EN); +} + +void emac_mac_stop(struct emac_adapter *adpt) +{ + emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, RXQ_EN, 0); + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, TXQ_EN, 0); + emac_reg_update32(adpt->base + EMAC_MAC_CTRL, TXEN | RXEN, 0); + usleep_range(1000, 1050); /* stopping mac may take upto 1msec */ +} + +/* Free all descriptors of given transmit queue */ +static void emac_tx_q_descs_free(struct emac_adapter *adpt) +{ + struct emac_tx_queue *tx_q = &adpt->tx_q; + unsigned int i; + size_t size; + + /* ring already cleared, nothing to do */ + if (!tx_q->tpd.tpbuff) + return; + + for (i = 0; i < tx_q->tpd.count; i++) { + struct emac_buffer *tpbuf = GET_TPD_BUFFER(tx_q, i); + + if (tpbuf->dma_addr) { + dma_unmap_single(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + } + if (tpbuf->skb) { + dev_kfree_skb_any(tpbuf->skb); + tpbuf->skb = NULL; + } + } + + size = sizeof(struct emac_buffer) * tx_q->tpd.count; + memset(tx_q->tpd.tpbuff, 0, size); + + /* clear the descriptor ring */ + memset(tx_q->tpd.v_addr, 0, tx_q->tpd.size); + + tx_q->tpd.consume_idx = 0; + tx_q->tpd.produce_idx = 0; +} + +/* Free all descriptors of given receive queue */ +static void emac_rx_q_free_descs(struct emac_adapter *adpt) +{ + struct device *dev = adpt->netdev->dev.parent; + struct emac_rx_queue *rx_q = &adpt->rx_q; + unsigned int i; + size_t size; + + /* ring already cleared, nothing to do */ + if (!rx_q->rfd.rfbuff) + return; + + for (i = 0; i < rx_q->rfd.count; i++) { + struct emac_buffer *rfbuf = GET_RFD_BUFFER(rx_q, i); + + if (rfbuf->dma_addr) { + dma_unmap_single(dev, rfbuf->dma_addr, rfbuf->length, + DMA_FROM_DEVICE); + rfbuf->dma_addr = 0; + } + if (rfbuf->skb) { + dev_kfree_skb(rfbuf->skb); + rfbuf->skb = NULL; + } + } + + size = sizeof(struct emac_buffer) * rx_q->rfd.count; + memset(rx_q->rfd.rfbuff, 0, size); + + /* clear the descriptor rings */ + memset(rx_q->rrd.v_addr, 0, rx_q->rrd.size); + rx_q->rrd.produce_idx = 0; + rx_q->rrd.consume_idx = 0; + + memset(rx_q->rfd.v_addr, 0, rx_q->rfd.size); + rx_q->rfd.produce_idx = 0; + rx_q->rfd.consume_idx = 0; +} + +/* Free all buffers associated with given transmit queue */ +static void emac_tx_q_bufs_free(struct emac_adapter *adpt) +{ + struct emac_tx_queue *tx_q = &adpt->tx_q; + + emac_tx_q_descs_free(adpt); + + kfree(tx_q->tpd.tpbuff); + tx_q->tpd.tpbuff = NULL; + tx_q->tpd.v_addr = NULL; + tx_q->tpd.dma_addr = 0; + tx_q->tpd.size = 0; +} + +/* Allocate TX descriptor ring for the given transmit queue */ +static int emac_tx_q_desc_alloc(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + size_t size; + + size = sizeof(struct emac_buffer) * tx_q->tpd.count; + tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL); + if (!tx_q->tpd.tpbuff) + return -ENOMEM; + + tx_q->tpd.size = tx_q->tpd.count * (adpt->tpd_size * 4); + tx_q->tpd.dma_addr = ring_header->dma_addr + ring_header->used; + tx_q->tpd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(tx_q->tpd.size, 8); + tx_q->tpd.produce_idx = 0; + tx_q->tpd.consume_idx = 0; + + return 0; +} + +/* Free all buffers associated with given transmit queue */ +static void emac_rx_q_bufs_free(struct emac_adapter *adpt) +{ + struct emac_rx_queue *rx_q = &adpt->rx_q; + + emac_rx_q_free_descs(adpt); + + kfree(rx_q->rfd.rfbuff); + rx_q->rfd.rfbuff = NULL; + + rx_q->rfd.v_addr = NULL; + rx_q->rfd.dma_addr = 0; + rx_q->rfd.size = 0; + + rx_q->rrd.v_addr = NULL; + rx_q->rrd.dma_addr = 0; + rx_q->rrd.size = 0; +} + +/* Allocate RX descriptor rings for the given receive queue */ +static int emac_rx_descs_alloc(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct emac_rx_queue *rx_q = &adpt->rx_q; + size_t size; + + size = sizeof(struct emac_buffer) * rx_q->rfd.count; + rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL); + if (!rx_q->rfd.rfbuff) + return -ENOMEM; + + rx_q->rrd.size = rx_q->rrd.count * (adpt->rrd_size * 4); + rx_q->rfd.size = rx_q->rfd.count * (adpt->rfd_size * 4); + + rx_q->rrd.dma_addr = ring_header->dma_addr + ring_header->used; + rx_q->rrd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(rx_q->rrd.size, 8); + + rx_q->rfd.dma_addr = ring_header->dma_addr + ring_header->used; + rx_q->rfd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(rx_q->rfd.size, 8); + + rx_q->rrd.produce_idx = 0; + rx_q->rrd.consume_idx = 0; + + rx_q->rfd.produce_idx = 0; + rx_q->rfd.consume_idx = 0; + + return 0; +} + +/* Allocate all TX and RX descriptor rings */ +int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct device *dev = adpt->netdev->dev.parent; + unsigned int num_tx_descs = adpt->tx_desc_cnt; + unsigned int num_rx_descs = adpt->rx_desc_cnt; + int ret; + + adpt->tx_q.tpd.count = adpt->tx_desc_cnt; + + adpt->rx_q.rrd.count = adpt->rx_desc_cnt; + adpt->rx_q.rfd.count = adpt->rx_desc_cnt; + + /* Ring DMA buffer. Each ring may need up to 8 bytes for alignment, + * hence the additional padding bytes are allocated. + */ + ring_header->size = num_tx_descs * (adpt->tpd_size * 4) + + num_rx_descs * (adpt->rfd_size * 4) + + num_rx_descs * (adpt->rrd_size * 4) + + 8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */ + + ring_header->used = 0; + ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size, + &ring_header->dma_addr, + GFP_KERNEL); + if (!ring_header->v_addr) + return -ENOMEM; + + ring_header->used = ALIGN(ring_header->dma_addr, 8) - + ring_header->dma_addr; + + ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q); + if (ret) { + netdev_err(adpt->netdev, "error: Tx Queue alloc failed\n"); + goto err_alloc_tx; + } + + ret = emac_rx_descs_alloc(adpt); + if (ret) { + netdev_err(adpt->netdev, "error: Rx Queue alloc failed\n"); + goto err_alloc_rx; + } + + return 0; + +err_alloc_rx: + emac_tx_q_bufs_free(adpt); +err_alloc_tx: + dma_free_coherent(dev, ring_header->size, + ring_header->v_addr, ring_header->dma_addr); + + ring_header->v_addr = NULL; + ring_header->dma_addr = 0; + ring_header->size = 0; + ring_header->used = 0; + + return ret; +} + +/* Free all TX and RX descriptor rings */ +void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct device *dev = adpt->netdev->dev.parent; + + emac_tx_q_bufs_free(adpt); + emac_rx_q_bufs_free(adpt); + + dma_free_coherent(dev, ring_header->size, + ring_header->v_addr, ring_header->dma_addr); + + ring_header->v_addr = NULL; + ring_header->dma_addr = 0; + ring_header->size = 0; + ring_header->used = 0; +} + +/* Initialize descriptor rings */ +static void emac_mac_rx_tx_ring_reset_all(struct emac_adapter *adpt) +{ + unsigned int i; + + adpt->tx_q.tpd.produce_idx = 0; + adpt->tx_q.tpd.consume_idx = 0; + for (i = 0; i < adpt->tx_q.tpd.count; i++) + adpt->tx_q.tpd.tpbuff[i].dma_addr = 0; + + adpt->rx_q.rrd.produce_idx = 0; + adpt->rx_q.rrd.consume_idx = 0; + adpt->rx_q.rfd.produce_idx = 0; + adpt->rx_q.rfd.consume_idx = 0; + for (i = 0; i < adpt->rx_q.rfd.count; i++) + adpt->rx_q.rfd.rfbuff[i].dma_addr = 0; +} + +/* Produce new receive free descriptor */ +static void emac_mac_rx_rfd_create(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q, + dma_addr_t addr) +{ + u32 *hw_rfd = EMAC_RFD(rx_q, adpt->rfd_size, rx_q->rfd.produce_idx); + + *(hw_rfd++) = lower_32_bits(addr); + *hw_rfd = upper_32_bits(addr); + + if (++rx_q->rfd.produce_idx == rx_q->rfd.count) + rx_q->rfd.produce_idx = 0; +} + +/* Fill up receive queue's RFD with preallocated receive buffers */ +static void emac_mac_rx_descs_refill(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q) +{ + struct emac_buffer *curr_rxbuf; + struct emac_buffer *next_rxbuf; + unsigned int count = 0; + u32 next_produce_idx; + + next_produce_idx = rx_q->rfd.produce_idx + 1; + if (next_produce_idx == rx_q->rfd.count) + next_produce_idx = 0; + + curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx); + next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx); + + /* this always has a blank rx_buffer*/ + while (!next_rxbuf->dma_addr) { + struct sk_buff *skb; + int ret; + + skb = netdev_alloc_skb_ip_align(adpt->netdev, adpt->rxbuf_size); + if (!skb) + break; + + curr_rxbuf->dma_addr = + dma_map_single(adpt->netdev->dev.parent, skb->data, + curr_rxbuf->length, DMA_FROM_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + curr_rxbuf->dma_addr); + if (ret) { + dev_kfree_skb(skb); + break; + } + curr_rxbuf->skb = skb; + curr_rxbuf->length = adpt->rxbuf_size; + + emac_mac_rx_rfd_create(adpt, rx_q, curr_rxbuf->dma_addr); + next_produce_idx = rx_q->rfd.produce_idx + 1; + if (next_produce_idx == rx_q->rfd.count) + next_produce_idx = 0; + + curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx); + next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx); + count++; + } + + if (count) { + u32 prod_idx = (rx_q->rfd.produce_idx << rx_q->produce_shift) & + rx_q->produce_mask; + emac_reg_update32(adpt->base + rx_q->produce_reg, + rx_q->produce_mask, prod_idx); + } +} + +static void emac_adjust_link(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; + + if (phydev->link) + emac_mac_start(adpt); + else + emac_mac_stop(adpt); + + phy_print_status(phydev); +} + +/* Bringup the interface/HW */ +int emac_mac_up(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + struct emac_irq *irq = &adpt->irq; + int ret; + + emac_mac_rx_tx_ring_reset_all(adpt); + emac_mac_config(adpt); + + ret = request_irq(irq->irq, emac_isr, 0, EMAC_MAC_IRQ_RES, irq); + if (ret) { + netdev_err(adpt->netdev, "could not request %s irq\n", + EMAC_MAC_IRQ_RES); + return ret; + } + + emac_mac_rx_descs_refill(adpt, &adpt->rx_q); + + ret = phy_connect_direct(netdev, adpt->phydev, emac_adjust_link, + PHY_INTERFACE_MODE_SGMII); + if (ret) { + netdev_err(adpt->netdev, "could not connect phy\n"); + free_irq(irq->irq, irq); + return ret; + } + + /* enable mac irq */ + writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS); + writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK); + + adpt->phydev->irq = PHY_IGNORE_INTERRUPT; + phy_start(adpt->phydev); + + napi_enable(&adpt->rx_q.napi); + netif_start_queue(netdev); + + return 0; +} + +/* Bring down the interface/HW */ +void emac_mac_down(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + + netif_stop_queue(netdev); + napi_disable(&adpt->rx_q.napi); + + phy_stop(adpt->phydev); + phy_disconnect(adpt->phydev); + + /* disable mac irq */ + writel(DIS_INT, adpt->base + EMAC_INT_STATUS); + writel(0, adpt->base + EMAC_INT_MASK); + synchronize_irq(adpt->irq.irq); + free_irq(adpt->irq.irq, &adpt->irq); + + emac_mac_reset(adpt); + + emac_tx_q_descs_free(adpt); + netdev_reset_queue(adpt->netdev); + emac_rx_q_free_descs(adpt); +} + +/* Consume next received packet descriptor */ +static bool emac_rx_process_rrd(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q, + struct emac_rrd *rrd) +{ + u32 *hw_rrd = EMAC_RRD(rx_q, adpt->rrd_size, rx_q->rrd.consume_idx); + + rrd->word[3] = *(hw_rrd + 3); + + if (!RRD_UPDT(rrd)) + return false; + + rrd->word[4] = 0; + rrd->word[5] = 0; + + rrd->word[0] = *(hw_rrd++); + rrd->word[1] = *(hw_rrd++); + rrd->word[2] = *(hw_rrd++); + + if (unlikely(RRD_NOR(rrd) != 1)) { + netdev_err(adpt->netdev, + "error: multi-RFD not support yet! nor:%lu\n", + RRD_NOR(rrd)); + } + + /* mark rrd as processed */ + RRD_UPDT_SET(rrd, 0); + *hw_rrd = rrd->word[3]; + + if (++rx_q->rrd.consume_idx == rx_q->rrd.count) + rx_q->rrd.consume_idx = 0; + + return true; +} + +/* Produce new transmit descriptor */ +static void emac_tx_tpd_create(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, struct emac_tpd *tpd) +{ + u32 *hw_tpd; + + tx_q->tpd.last_produce_idx = tx_q->tpd.produce_idx; + hw_tpd = EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.produce_idx); + + if (++tx_q->tpd.produce_idx == tx_q->tpd.count) + tx_q->tpd.produce_idx = 0; + + *(hw_tpd++) = tpd->word[0]; + *(hw_tpd++) = tpd->word[1]; + *(hw_tpd++) = tpd->word[2]; + *hw_tpd = tpd->word[3]; +} + +/* Mark the last transmit descriptor as such (for the transmit packet) */ +static void emac_tx_tpd_mark_last(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q) +{ + u32 *hw_tpd = + EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.last_produce_idx); + u32 tmp_tpd; + + tmp_tpd = *(hw_tpd + 1); + tmp_tpd |= EMAC_TPD_LAST_FRAGMENT; + *(hw_tpd + 1) = tmp_tpd; +} + +static void emac_rx_rfd_clean(struct emac_rx_queue *rx_q, struct emac_rrd *rrd) +{ + struct emac_buffer *rfbuf = rx_q->rfd.rfbuff; + u32 consume_idx = RRD_SI(rrd); + unsigned int i; + + for (i = 0; i < RRD_NOR(rrd); i++) { + rfbuf[consume_idx].skb = NULL; + if (++consume_idx == rx_q->rfd.count) + consume_idx = 0; + } + + rx_q->rfd.consume_idx = consume_idx; + rx_q->rfd.process_idx = consume_idx; +} + +/* Push the received skb to upper layers */ +static void emac_receive_skb(struct emac_rx_queue *rx_q, + struct sk_buff *skb, + u16 vlan_tag, bool vlan_flag) +{ + if (vlan_flag) { + u16 vlan; + + EMAC_TAG_TO_VLAN(vlan_tag, vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); + } + + napi_gro_receive(&rx_q->napi, skb); +} + +/* Process receive event */ +void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q, + int *num_pkts, int max_pkts) +{ + u32 proc_idx, hw_consume_idx, num_consume_pkts; + struct net_device *netdev = adpt->netdev; + struct emac_buffer *rfbuf; + unsigned int count = 0; + struct emac_rrd rrd; + struct sk_buff *skb; + u32 reg; + + reg = readl_relaxed(adpt->base + rx_q->consume_reg); + + hw_consume_idx = (reg & rx_q->consume_mask) >> rx_q->consume_shift; + num_consume_pkts = (hw_consume_idx >= rx_q->rrd.consume_idx) ? + (hw_consume_idx - rx_q->rrd.consume_idx) : + (hw_consume_idx + rx_q->rrd.count - rx_q->rrd.consume_idx); + + do { + if (!num_consume_pkts) + break; + + if (!emac_rx_process_rrd(adpt, rx_q, &rrd)) + break; + + if (likely(RRD_NOR(&rrd) == 1)) { + /* good receive */ + rfbuf = GET_RFD_BUFFER(rx_q, RRD_SI(&rrd)); + dma_unmap_single(adpt->netdev->dev.parent, + rfbuf->dma_addr, rfbuf->length, + DMA_FROM_DEVICE); + rfbuf->dma_addr = 0; + skb = rfbuf->skb; + } else { + netdev_err(adpt->netdev, + "error: multi-RFD not support yet!\n"); + break; + } + emac_rx_rfd_clean(rx_q, &rrd); + num_consume_pkts--; + count++; + + /* Due to a HW issue in L4 check sum detection (UDP/TCP frags + * with DF set are marked as error), drop packets based on the + * error mask rather than the summary bit (ignoring L4F errors) + */ + if (rrd.word[EMAC_RRD_STATS_DW_IDX] & EMAC_RRD_ERROR) { + netif_dbg(adpt, rx_status, adpt->netdev, + "Drop error packet[RRD: 0x%x:0x%x:0x%x:0x%x]\n", + rrd.word[0], rrd.word[1], + rrd.word[2], rrd.word[3]); + + dev_kfree_skb(skb); + continue; + } + + skb_put(skb, RRD_PKT_SIZE(&rrd) - ETH_FCS_LEN); + skb->dev = netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + if (netdev->features & NETIF_F_RXCSUM) + skb->ip_summed = RRD_L4F(&rrd) ? + CHECKSUM_NONE : CHECKSUM_UNNECESSARY; + else + skb_checksum_none_assert(skb); + + emac_receive_skb(rx_q, skb, (u16)RRD_CVALN_TAG(&rrd), + (bool)RRD_CVTAG(&rrd)); + + netdev->last_rx = jiffies; + (*num_pkts)++; + } while (*num_pkts < max_pkts); + + if (count) { + proc_idx = (rx_q->rfd.process_idx << rx_q->process_shft) & + rx_q->process_mask; + emac_reg_update32(adpt->base + rx_q->process_reg, + rx_q->process_mask, proc_idx); + emac_mac_rx_descs_refill(adpt, rx_q); + } +} + +/* get the number of free transmit descriptors */ +static unsigned int emac_tpd_num_free_descs(struct emac_tx_queue *tx_q) +{ + u32 produce_idx = tx_q->tpd.produce_idx; + u32 consume_idx = tx_q->tpd.consume_idx; + + return (consume_idx > produce_idx) ? + (consume_idx - produce_idx - 1) : + (tx_q->tpd.count + consume_idx - produce_idx - 1); +} + +/* Process transmit event */ +void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) +{ + u32 reg = readl_relaxed(adpt->base + tx_q->consume_reg); + u32 hw_consume_idx, pkts_compl = 0, bytes_compl = 0; + struct emac_buffer *tpbuf; + + hw_consume_idx = (reg & tx_q->consume_mask) >> tx_q->consume_shift; + + while (tx_q->tpd.consume_idx != hw_consume_idx) { + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx); + if (tpbuf->dma_addr) { + dma_unmap_single(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + } + + if (tpbuf->skb) { + pkts_compl++; + bytes_compl += tpbuf->skb->len; + dev_kfree_skb_irq(tpbuf->skb); + tpbuf->skb = NULL; + } + + if (++tx_q->tpd.consume_idx == tx_q->tpd.count) + tx_q->tpd.consume_idx = 0; + } + + netdev_completed_queue(adpt->netdev, pkts_compl, bytes_compl); + + if (netif_queue_stopped(adpt->netdev)) + if (emac_tpd_num_free_descs(tx_q) > (MAX_SKB_FRAGS + 1)) + netif_wake_queue(adpt->netdev); +} + +/* Initialize all queue data structures */ +void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + adpt->rx_q.netdev = adpt->netdev; + + adpt->rx_q.produce_reg = EMAC_MAILBOX_0; + adpt->rx_q.produce_mask = RFD0_PROD_IDX_BMSK; + adpt->rx_q.produce_shift = RFD0_PROD_IDX_SHFT; + + adpt->rx_q.process_reg = EMAC_MAILBOX_0; + adpt->rx_q.process_mask = RFD0_PROC_IDX_BMSK; + adpt->rx_q.process_shft = RFD0_PROC_IDX_SHFT; + + adpt->rx_q.consume_reg = EMAC_MAILBOX_3; + adpt->rx_q.consume_mask = RFD0_CONS_IDX_BMSK; + adpt->rx_q.consume_shift = RFD0_CONS_IDX_SHFT; + + adpt->rx_q.irq = &adpt->irq; + adpt->rx_q.intr = adpt->irq.mask & ISR_RX_PKT; + + adpt->tx_q.produce_reg = EMAC_MAILBOX_15; + adpt->tx_q.produce_mask = NTPD_PROD_IDX_BMSK; + adpt->tx_q.produce_shift = NTPD_PROD_IDX_SHFT; + + adpt->tx_q.consume_reg = EMAC_MAILBOX_2; + adpt->tx_q.consume_mask = NTPD_CONS_IDX_BMSK; + adpt->tx_q.consume_shift = NTPD_CONS_IDX_SHFT; +} + +/* Fill up transmit descriptors with TSO and Checksum offload information */ +static int emac_tso_csum(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, + struct sk_buff *skb, + struct emac_tpd *tpd) +{ + unsigned int hdr_len; + int ret; + + if (skb_is_gso(skb)) { + if (skb_header_cloned(skb)) { + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (unlikely(ret)) + return ret; + } + + if (skb->protocol == htons(ETH_P_IP)) { + u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data) + + ntohs(ip_hdr(skb)->tot_len); + if (skb->len > pkt_len) + pskb_trim(skb, pkt_len); + } + + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + if (unlikely(skb->len == hdr_len)) { + /* we only need to do csum */ + netif_warn(adpt, tx_err, adpt->netdev, + "tso not needed for packet with 0 data\n"); + goto do_csum; + } + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + TPD_IPV4_SET(tpd, 1); + } + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { + /* ipv6 tso need an extra tpd */ + struct emac_tpd extra_tpd; + + memset(tpd, 0, sizeof(*tpd)); + memset(&extra_tpd, 0, sizeof(extra_tpd)); + + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + TPD_PKT_LEN_SET(&extra_tpd, skb->len); + TPD_LSO_SET(&extra_tpd, 1); + TPD_LSOV_SET(&extra_tpd, 1); + emac_tx_tpd_create(adpt, tx_q, &extra_tpd); + TPD_LSOV_SET(tpd, 1); + } + + TPD_LSO_SET(tpd, 1); + TPD_TCPHDR_OFFSET_SET(tpd, skb_transport_offset(skb)); + TPD_MSS_SET(tpd, skb_shinfo(skb)->gso_size); + return 0; + } + +do_csum: + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + unsigned int css, cso; + + cso = skb_transport_offset(skb); + if (unlikely(cso & 0x1)) { + netdev_err(adpt->netdev, + "error: payload offset should be even\n"); + return -EINVAL; + } + css = cso + skb->csum_offset; + + TPD_PAYLOAD_OFFSET_SET(tpd, cso >> 1); + TPD_CXSUM_OFFSET_SET(tpd, css >> 1); + TPD_CSX_SET(tpd, 1); + } + + return 0; +} + +/* Fill up transmit descriptors */ +static void emac_tx_fill_tpd(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, struct sk_buff *skb, + struct emac_tpd *tpd) +{ + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + unsigned int first = tx_q->tpd.produce_idx; + unsigned int len = skb_headlen(skb); + struct emac_buffer *tpbuf = NULL; + unsigned int mapped_len = 0; + unsigned int i; + int count = 0; + int ret; + + /* if Large Segment Offload is (in TCP Segmentation Offload struct) */ + if (TPD_LSO(tpd)) { + mapped_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = mapped_len; + tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, + skb->data, tpbuf->length, + DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + if (mapped_len < len) { + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = len - mapped_len; + tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, + skb->data + mapped_len, + tpbuf->length, DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + for (i = 0; i < nr_frags; i++) { + struct skb_frag_struct *frag; + + frag = &skb_shinfo(skb)->frags[i]; + + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = frag->size; + tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent, + frag->page.p, frag->page_offset, + tpbuf->length, DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + /* The last tpd */ + wmb(); + emac_tx_tpd_mark_last(adpt, tx_q); + + /* The last buffer info contain the skb address, + * so it will be freed after unmap + */ + tpbuf->skb = skb; + + return; + +error: + /* One of the memory mappings failed, so undo everything */ + tx_q->tpd.produce_idx = first; + + while (count--) { + tpbuf = GET_TPD_BUFFER(tx_q, first); + dma_unmap_page(adpt->netdev->dev.parent, tpbuf->dma_addr, + tpbuf->length, DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + tpbuf->length = 0; + + if (++first == tx_q->tpd.count) + first = 0; + } + + dev_kfree_skb(skb); +} + +/* Transmit the packet using specified transmit queue */ +int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, + struct sk_buff *skb) +{ + struct emac_tpd tpd; + u32 prod_idx; + + memset(&tpd, 0, sizeof(tpd)); + + if (emac_tso_csum(adpt, tx_q, skb, &tpd) != 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + if (skb_vlan_tag_present(skb)) { + u16 tag; + + EMAC_VLAN_TO_TAG(skb_vlan_tag_get(skb), tag); + TPD_CVLAN_TAG_SET(&tpd, tag); + TPD_INSTC_SET(&tpd, 1); + } + + if (skb_network_offset(skb) != ETH_HLEN) + TPD_TYP_SET(&tpd, 1); + + emac_tx_fill_tpd(adpt, tx_q, skb, &tpd); + + netdev_sent_queue(adpt->netdev, skb->len); + + /* Make sure the are enough free descriptors to hold one + * maximum-sized SKB. We need one desc for each fragment, + * one for the checksum (emac_tso_csum), one for TSO, and + * and one for the SKB header. + */ + if (emac_tpd_num_free_descs(tx_q) < (MAX_SKB_FRAGS + 3)) + netif_stop_queue(adpt->netdev); + + /* update produce idx */ + prod_idx = (tx_q->tpd.produce_idx << tx_q->produce_shift) & + tx_q->produce_mask; + emac_reg_update32(adpt->base + tx_q->produce_reg, + tx_q->produce_mask, prod_idx); + + return NETDEV_TX_OK; +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h new file mode 100644 index 000000000000..f3aa24dc4a29 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h @@ -0,0 +1,248 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* EMAC DMA HW engine uses three rings: + * Tx: + * TPD: Transmit Packet Descriptor ring. + * Rx: + * RFD: Receive Free Descriptor ring. + * Ring of descriptors with empty buffers to be filled by Rx HW. + * RRD: Receive Return Descriptor ring. + * Ring of descriptors with buffers filled with received data. + */ + +#ifndef _EMAC_HW_H_ +#define _EMAC_HW_H_ + +/* EMAC_CSR register offsets */ +#define EMAC_EMAC_WRAPPER_CSR1 0x000000 +#define EMAC_EMAC_WRAPPER_CSR2 0x000004 +#define EMAC_EMAC_WRAPPER_TX_TS_LO 0x000104 +#define EMAC_EMAC_WRAPPER_TX_TS_HI 0x000108 +#define EMAC_EMAC_WRAPPER_TX_TS_INX 0x00010c + +/* DMA Order Settings */ +enum emac_dma_order { + emac_dma_ord_in = 1, + emac_dma_ord_enh = 2, + emac_dma_ord_out = 4 +}; + +enum emac_dma_req_block { + emac_dma_req_128 = 0, + emac_dma_req_256 = 1, + emac_dma_req_512 = 2, + emac_dma_req_1024 = 3, + emac_dma_req_2048 = 4, + emac_dma_req_4096 = 5 +}; + +/* Returns the value of bits idx...idx+n_bits */ +#define BITS_GET(val, lo, hi) ((le32_to_cpu(val) & GENMASK((hi), (lo))) >> lo) +#define BITS_SET(val, lo, hi, new_val) \ + val = cpu_to_le32((le32_to_cpu(val) & (~GENMASK((hi), (lo)))) | \ + (((new_val) << (lo)) & GENMASK((hi), (lo)))) + +/* RRD (Receive Return Descriptor) */ +struct emac_rrd { + u32 word[6]; + +/* number of RFD */ +#define RRD_NOR(rrd) BITS_GET((rrd)->word[0], 16, 19) +/* start consumer index of rfd-ring */ +#define RRD_SI(rrd) BITS_GET((rrd)->word[0], 20, 31) +/* vlan-tag (CVID, CFI and PRI) */ +#define RRD_CVALN_TAG(rrd) BITS_GET((rrd)->word[2], 0, 15) +/* length of the packet */ +#define RRD_PKT_SIZE(rrd) BITS_GET((rrd)->word[3], 0, 13) +/* L4(TCP/UDP) checksum failed */ +#define RRD_L4F(rrd) BITS_GET((rrd)->word[3], 14, 14) +/* vlan tagged */ +#define RRD_CVTAG(rrd) BITS_GET((rrd)->word[3], 16, 16) +/* When set, indicates that the descriptor is updated by the IP core. + * When cleared, indicates that the descriptor is invalid. + */ +#define RRD_UPDT(rrd) BITS_GET((rrd)->word[3], 31, 31) +#define RRD_UPDT_SET(rrd, val) BITS_SET((rrd)->word[3], 31, 31, val) +/* timestamp low */ +#define RRD_TS_LOW(rrd) BITS_GET((rrd)->word[4], 0, 29) +/* timestamp high */ +#define RRD_TS_HI(rrd) le32_to_cpu((rrd)->word[5]) +}; + +/* TPD (Transmit Packet Descriptor) */ +struct emac_tpd { + u32 word[4]; + +/* Number of bytes of the transmit packet. (include 4-byte CRC) */ +#define TPD_BUF_LEN_SET(tpd, val) BITS_SET((tpd)->word[0], 0, 15, val) +/* Custom Checksum Offload: When set, ask IP core to offload custom checksum */ +#define TPD_CSX_SET(tpd, val) BITS_SET((tpd)->word[1], 8, 8, val) +/* TCP Large Send Offload: When set, ask IP core to do offload TCP Large Send */ +#define TPD_LSO(tpd) BITS_GET((tpd)->word[1], 12, 12) +#define TPD_LSO_SET(tpd, val) BITS_SET((tpd)->word[1], 12, 12, val) +/* Large Send Offload Version: When set, indicates this is an LSOv2 + * (for both IPv4 and IPv6). When cleared, indicates this is an LSOv1 + * (only for IPv4). + */ +#define TPD_LSOV_SET(tpd, val) BITS_SET((tpd)->word[1], 13, 13, val) +/* IPv4 packet: When set, indicates this is an IPv4 packet, this bit is only + * for LSOV2 format. + */ +#define TPD_IPV4_SET(tpd, val) BITS_SET((tpd)->word[1], 16, 16, val) +/* 0: Ethernet frame (DA+SA+TYPE+DATA+CRC) + * 1: IEEE 802.3 frame (DA+SA+LEN+DSAP+SSAP+CTL+ORG+TYPE+DATA+CRC) + */ +#define TPD_TYP_SET(tpd, val) BITS_SET((tpd)->word[1], 17, 17, val) +/* Low-32bit Buffer Address */ +#define TPD_BUFFER_ADDR_L_SET(tpd, val) ((tpd)->word[2] = cpu_to_le32(val)) +/* CVLAN Tag to be inserted if INS_VLAN_TAG is set, CVLAN TPID based on global + * register configuration. + */ +#define TPD_CVLAN_TAG_SET(tpd, val) BITS_SET((tpd)->word[3], 0, 15, val) +/* Insert CVlan Tag: When set, ask MAC to insert CVLAN TAG to outgoing packet + */ +#define TPD_INSTC_SET(tpd, val) BITS_SET((tpd)->word[3], 17, 17, val) +/* High-14bit Buffer Address, So, the 64b-bit address is + * {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L} + */ +#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 30, val) +/* Format D. Word offset from the 1st byte of this packet to start to calculate + * the custom checksum. + */ +#define TPD_PAYLOAD_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val) +/* Format D. Word offset from the 1st byte of this packet to fill the custom + * checksum to + */ +#define TPD_CXSUM_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 18, 25, val) + +/* Format C. TCP Header offset from the 1st byte of this packet. (byte unit) */ +#define TPD_TCPHDR_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val) +/* Format C. MSS (Maximum Segment Size) got from the protocol layer. (byte unit) + */ +#define TPD_MSS_SET(tpd, val) BITS_SET((tpd)->word[1], 18, 30, val) +/* packet length in ext tpd */ +#define TPD_PKT_LEN_SET(tpd, val) ((tpd)->word[2] = cpu_to_le32(val)) +}; + +/* emac_ring_header represents a single, contiguous block of DMA space + * mapped for the three descriptor rings (tpd, rfd, rrd) + */ +struct emac_ring_header { + void *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + size_t size; /* length in bytes */ + size_t used; +}; + +/* emac_buffer is wrapper around a pointer to a socket buffer + * so a DMA handle can be stored along with the skb + */ +struct emac_buffer { + struct sk_buff *skb; /* socket buffer */ + u16 length; /* rx buffer length */ + dma_addr_t dma_addr; /* dma address */ +}; + +/* receive free descriptor (rfd) ring */ +struct emac_rfd_ring { + struct emac_buffer *rfbuff; + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; + unsigned int process_idx; + unsigned int consume_idx; /* unused */ +}; + +/* Receive Return Desciptor (RRD) ring */ +struct emac_rrd_ring { + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* physical address */ + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; /* unused */ + unsigned int consume_idx; +}; + +/* Rx queue */ +struct emac_rx_queue { + struct net_device *netdev; /* netdev ring belongs to */ + struct emac_rrd_ring rrd; + struct emac_rfd_ring rfd; + struct napi_struct napi; + struct emac_irq *irq; + + u32 intr; + u32 produce_mask; + u32 process_mask; + u32 consume_mask; + + u16 produce_reg; + u16 process_reg; + u16 consume_reg; + + u8 produce_shift; + u8 process_shft; + u8 consume_shift; +}; + +/* Transimit Packet Descriptor (tpd) ring */ +struct emac_tpd_ring { + struct emac_buffer *tpbuff; + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; + unsigned int consume_idx; + unsigned int last_produce_idx; +}; + +/* Tx queue */ +struct emac_tx_queue { + struct emac_tpd_ring tpd; + + u32 produce_mask; + u32 consume_mask; + + u16 max_packets; /* max packets per interrupt */ + u16 produce_reg; + u16 consume_reg; + + u8 produce_shift; + u8 consume_shift; +}; + +struct emac_adapter; + +int emac_mac_up(struct emac_adapter *adpt); +void emac_mac_down(struct emac_adapter *adpt); +void emac_mac_reset(struct emac_adapter *adpt); +void emac_mac_start(struct emac_adapter *adpt); +void emac_mac_stop(struct emac_adapter *adpt); +void emac_mac_mode_config(struct emac_adapter *adpt); +void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q, + int *num_pkts, int max_pkts); +int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, + struct sk_buff *skb); +void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q); +void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev, + struct emac_adapter *adpt); +int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt); +void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt); +void emac_mac_multicast_addr_clear(struct emac_adapter *adpt); +void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr); + +#endif /*_EMAC_HW_H_*/ diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c new file mode 100644 index 000000000000..c412ba9a27e7 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c @@ -0,0 +1,204 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC PHY Controller driver. + */ + +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-phy.h" +#include "emac-sgmii.h" + +/* EMAC base register offsets */ +#define EMAC_MDIO_CTRL 0x001414 +#define EMAC_PHY_STS 0x001418 +#define EMAC_MDIO_EX_CTRL 0x001440 + +/* EMAC_MDIO_CTRL */ +#define MDIO_MODE BIT(30) +#define MDIO_PR BIT(29) +#define MDIO_AP_EN BIT(28) +#define MDIO_BUSY BIT(27) +#define MDIO_CLK_SEL_BMSK 0x7000000 +#define MDIO_CLK_SEL_SHFT 24 +#define MDIO_START BIT(23) +#define SUP_PREAMBLE BIT(22) +#define MDIO_RD_NWR BIT(21) +#define MDIO_REG_ADDR_BMSK 0x1f0000 +#define MDIO_REG_ADDR_SHFT 16 +#define MDIO_DATA_BMSK 0xffff +#define MDIO_DATA_SHFT 0 + +/* EMAC_PHY_STS */ +#define PHY_ADDR_BMSK 0x1f0000 +#define PHY_ADDR_SHFT 16 + +#define MDIO_CLK_25_4 0 +#define MDIO_CLK_25_28 7 + +#define MDIO_WAIT_TIMES 1000 + +#define EMAC_LINK_SPEED_DEFAULT (\ + EMAC_LINK_SPEED_10_HALF |\ + EMAC_LINK_SPEED_10_FULL |\ + EMAC_LINK_SPEED_100_HALF |\ + EMAC_LINK_SPEED_100_FULL |\ + EMAC_LINK_SPEED_1GB_FULL) + +/** + * emac_phy_mdio_autopoll_disable() - disable mdio autopoll + * @adpt: the emac adapter + * + * The autopoll feature takes over the MDIO bus. In order for + * the PHY driver to be able to talk to the PHY over the MDIO + * bus, we need to temporarily disable the autopoll feature. + */ +static int emac_phy_mdio_autopoll_disable(struct emac_adapter *adpt) +{ + u32 val; + + /* disable autopoll */ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, MDIO_AP_EN, 0); + + /* wait for any mdio polling to complete */ + if (!readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, val, + !(val & MDIO_BUSY), 100, MDIO_WAIT_TIMES * 100)) + return 0; + + /* failed to disable; ensure it is enabled before returning */ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN); + + return -EBUSY; +} + +/** + * emac_phy_mdio_autopoll_disable() - disable mdio autopoll + * @adpt: the emac adapter + * + * The EMAC has the ability to poll the external PHY on the MDIO + * bus for link state changes. This eliminates the need for the + * driver to poll the phy. If if the link state does change, + * the EMAC issues an interrupt on behalf of the PHY. + */ +static void emac_phy_mdio_autopoll_enable(struct emac_adapter *adpt) +{ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN); +} + +static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum) +{ + struct emac_adapter *adpt = bus->priv; + u32 reg; + int ret; + + ret = emac_phy_mdio_autopoll_disable(adpt); + if (ret) + return ret; + + emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK, + (addr << PHY_ADDR_SHFT)); + + reg = SUP_PREAMBLE | + ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) | + ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) | + MDIO_START | MDIO_RD_NWR; + + writel(reg, adpt->base + EMAC_MDIO_CTRL); + + if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, + !(reg & (MDIO_START | MDIO_BUSY)), + 100, MDIO_WAIT_TIMES * 100)) + ret = -EIO; + else + ret = (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK; + + emac_phy_mdio_autopoll_enable(adpt); + + return ret; +} + +static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) +{ + struct emac_adapter *adpt = bus->priv; + u32 reg; + int ret; + + ret = emac_phy_mdio_autopoll_disable(adpt); + if (ret) + return ret; + + emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK, + (addr << PHY_ADDR_SHFT)); + + reg = SUP_PREAMBLE | + ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) | + ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) | + ((val << MDIO_DATA_SHFT) & MDIO_DATA_BMSK) | + MDIO_START; + + writel(reg, adpt->base + EMAC_MDIO_CTRL); + + if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, + !(reg & (MDIO_START | MDIO_BUSY)), 100, + MDIO_WAIT_TIMES * 100)) + ret = -EIO; + + emac_phy_mdio_autopoll_enable(adpt); + + return ret; +} + +/* Configure the MDIO bus and connect the external PHY */ +int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) +{ + struct device_node *np = pdev->dev.of_node; + struct device_node *phy_np; + struct mii_bus *mii_bus; + int ret; + + /* Create the mii_bus object for talking to the MDIO bus */ + adpt->mii_bus = mii_bus = devm_mdiobus_alloc(&pdev->dev); + if (!mii_bus) + return -ENOMEM; + + mii_bus->name = "emac-mdio"; + snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); + mii_bus->read = emac_mdio_read; + mii_bus->write = emac_mdio_write; + mii_bus->parent = &pdev->dev; + mii_bus->priv = adpt; + + ret = of_mdiobus_register(mii_bus, np); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + + phy_np = of_parse_phandle(np, "phy-handle", 0); + adpt->phydev = of_phy_find_device(phy_np); + if (!adpt->phydev) { + dev_err(&pdev->dev, "could not find external phy\n"); + mdiobus_unregister(mii_bus); + return -ENODEV; + } + + if (adpt->phydev->drv) + phy_attached_print(adpt->phydev, NULL); + + return 0; +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.h b/drivers/net/ethernet/qualcomm/emac/emac-phy.h new file mode 100644 index 000000000000..49f3701a6dd7 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License version 2 and +* only version 2 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +*/ + +#ifndef _EMAC_PHY_H_ +#define _EMAC_PHY_H_ + +typedef int (*emac_sgmii_initialize)(struct emac_adapter *adpt); + +/** emac_phy - internal emac phy + * @base base address + * @digital per-lane digital block + * @initialize initialization function + */ +struct emac_phy { + void __iomem *base; + void __iomem *digital; + emac_sgmii_initialize initialize; +}; + +struct emac_adapter; + +int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt); + +#endif /* _EMAC_PHY_H_ */ diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c new file mode 100644 index 000000000000..6ab0a3c96431 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -0,0 +1,721 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC SGMII Controller driver. + */ + +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-sgmii.h" + +/* EMAC_QSERDES register offsets */ +#define EMAC_QSERDES_COM_SYS_CLK_CTRL 0x000000 +#define EMAC_QSERDES_COM_PLL_CNTRL 0x000014 +#define EMAC_QSERDES_COM_PLL_IP_SETI 0x000018 +#define EMAC_QSERDES_COM_PLL_CP_SETI 0x000024 +#define EMAC_QSERDES_COM_PLL_IP_SETP 0x000028 +#define EMAC_QSERDES_COM_PLL_CP_SETP 0x00002c +#define EMAC_QSERDES_COM_SYSCLK_EN_SEL 0x000038 +#define EMAC_QSERDES_COM_RESETSM_CNTRL 0x000040 +#define EMAC_QSERDES_COM_PLLLOCK_CMP1 0x000044 +#define EMAC_QSERDES_COM_PLLLOCK_CMP2 0x000048 +#define EMAC_QSERDES_COM_PLLLOCK_CMP3 0x00004c +#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN 0x000050 +#define EMAC_QSERDES_COM_DEC_START1 0x000064 +#define EMAC_QSERDES_COM_DIV_FRAC_START1 0x000098 +#define EMAC_QSERDES_COM_DIV_FRAC_START2 0x00009c +#define EMAC_QSERDES_COM_DIV_FRAC_START3 0x0000a0 +#define EMAC_QSERDES_COM_DEC_START2 0x0000a4 +#define EMAC_QSERDES_COM_PLL_CRCTRL 0x0000ac +#define EMAC_QSERDES_COM_RESET_SM 0x0000bc +#define EMAC_QSERDES_TX_BIST_MODE_LANENO 0x000100 +#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL 0x000108 +#define EMAC_QSERDES_TX_TX_DRV_LVL 0x00010c +#define EMAC_QSERDES_TX_LANE_MODE 0x000150 +#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN 0x000170 +#define EMAC_QSERDES_RX_CDR_CONTROL 0x000200 +#define EMAC_QSERDES_RX_CDR_CONTROL2 0x000210 +#define EMAC_QSERDES_RX_RX_EQ_GAIN12 0x000230 + +/* EMAC_SGMII register offsets */ +#define EMAC_SGMII_PHY_SERDES_START 0x000000 +#define EMAC_SGMII_PHY_CMN_PWR_CTRL 0x000004 +#define EMAC_SGMII_PHY_RX_PWR_CTRL 0x000008 +#define EMAC_SGMII_PHY_TX_PWR_CTRL 0x00000C +#define EMAC_SGMII_PHY_LANE_CTRL1 0x000018 +#define EMAC_SGMII_PHY_AUTONEG_CFG2 0x000048 +#define EMAC_SGMII_PHY_CDR_CTRL0 0x000058 +#define EMAC_SGMII_PHY_SPEED_CFG1 0x000074 +#define EMAC_SGMII_PHY_POW_DWN_CTRL0 0x000080 +#define EMAC_SGMII_PHY_RESET_CTRL 0x0000a8 +#define EMAC_SGMII_PHY_IRQ_CMD 0x0000ac +#define EMAC_SGMII_PHY_INTERRUPT_CLEAR 0x0000b0 +#define EMAC_SGMII_PHY_INTERRUPT_MASK 0x0000b4 +#define EMAC_SGMII_PHY_INTERRUPT_STATUS 0x0000b8 +#define EMAC_SGMII_PHY_RX_CHK_STATUS 0x0000d4 +#define EMAC_SGMII_PHY_AUTONEG0_STATUS 0x0000e0 +#define EMAC_SGMII_PHY_AUTONEG1_STATUS 0x0000e4 + +/* EMAC_QSERDES_COM_PLL_IP_SETI */ +#define PLL_IPSETI(x) ((x) & 0x3f) + +/* EMAC_QSERDES_COM_PLL_CP_SETI */ +#define PLL_CPSETI(x) ((x) & 0xff) + +/* EMAC_QSERDES_COM_PLL_IP_SETP */ +#define PLL_IPSETP(x) ((x) & 0x3f) + +/* EMAC_QSERDES_COM_PLL_CP_SETP */ +#define PLL_CPSETP(x) ((x) & 0x1f) + +/* EMAC_QSERDES_COM_PLL_CRCTRL */ +#define PLL_RCTRL(x) (((x) & 0xf) << 4) +#define PLL_CCTRL(x) ((x) & 0xf) + +/* SGMII v2 PHY registers per lane */ +#define EMAC_SGMII_PHY_LN_OFFSET 0x0400 + +/* SGMII v2 digital lane registers */ +#define EMAC_SGMII_LN_DRVR_CTRL0 0x00C +#define EMAC_SGMII_LN_DRVR_TAP_EN 0x018 +#define EMAC_SGMII_LN_TX_MARGINING 0x01C +#define EMAC_SGMII_LN_TX_PRE 0x020 +#define EMAC_SGMII_LN_TX_POST 0x024 +#define EMAC_SGMII_LN_TX_BAND_MODE 0x060 +#define EMAC_SGMII_LN_LANE_MODE 0x064 +#define EMAC_SGMII_LN_PARALLEL_RATE 0x078 +#define EMAC_SGMII_LN_CML_CTRL_MODE0 0x0B8 +#define EMAC_SGMII_LN_MIXER_CTRL_MODE0 0x0D0 +#define EMAC_SGMII_LN_VGA_INITVAL 0x134 +#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0 0x17C +#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0 0x188 +#define EMAC_SGMII_LN_UCDR_SO_CONFIG 0x194 +#define EMAC_SGMII_LN_RX_BAND 0x19C +#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0 0x1B8 +#define EMAC_SGMII_LN_RSM_CONFIG 0x1F0 +#define EMAC_SGMII_LN_SIGDET_ENABLES 0x224 +#define EMAC_SGMII_LN_SIGDET_CNTRL 0x228 +#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL 0x22C +#define EMAC_SGMII_LN_RX_EN_SIGNAL 0x2A0 +#define EMAC_SGMII_LN_RX_MISC_CNTRL0 0x2AC +#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV 0x2BC + +/* SGMII v2 digital lane register values */ +#define UCDR_STEP_BY_TWO_MODE0 BIT(7) +#define UCDR_xO_GAIN_MODE(x) ((x) & 0x7f) +#define UCDR_ENABLE BIT(6) +#define UCDR_SO_SATURATION(x) ((x) & 0x3f) +#define SIGDET_LP_BYP_PS4 BIT(7) +#define SIGDET_EN_PS0_TO_PS2 BIT(6) +#define EN_ACCOUPLEVCM_SW_MUX BIT(5) +#define EN_ACCOUPLEVCM_SW BIT(4) +#define RX_SYNC_EN BIT(3) +#define RXTERM_HIGHZ_PS5 BIT(2) +#define SIGDET_EN_PS3 BIT(1) +#define EN_ACCOUPLE_VCM_PS3 BIT(0) +#define UFS_MODE BIT(5) +#define TXVAL_VALID_INIT BIT(4) +#define TXVAL_VALID_MUX BIT(3) +#define TXVAL_VALID BIT(2) +#define USB3P1_MODE BIT(1) +#define KR_PCIGEN3_MODE BIT(0) +#define PRE_EN BIT(3) +#define POST_EN BIT(2) +#define MAIN_EN_MUX BIT(1) +#define MAIN_EN BIT(0) +#define TX_MARGINING_MUX BIT(6) +#define TX_MARGINING(x) ((x) & 0x3f) +#define TX_PRE_MUX BIT(6) +#define TX_PRE(x) ((x) & 0x3f) +#define TX_POST_MUX BIT(6) +#define TX_POST(x) ((x) & 0x3f) +#define CML_GEAR_MODE(x) (((x) & 7) << 3) +#define CML2CMOS_IBOOST_MODE(x) ((x) & 7) +#define MIXER_LOADB_MODE(x) (((x) & 0xf) << 2) +#define MIXER_DATARATE_MODE(x) ((x) & 3) +#define VGA_THRESH_DFE(x) ((x) & 0x3f) +#define SIGDET_LP_BYP_PS0_TO_PS2 BIT(5) +#define SIGDET_LP_BYP_MUX BIT(4) +#define SIGDET_LP_BYP BIT(3) +#define SIGDET_EN_MUX BIT(2) +#define SIGDET_EN BIT(1) +#define SIGDET_FLT_BYP BIT(0) +#define SIGDET_LVL(x) (((x) & 0xf) << 4) +#define SIGDET_BW_CTRL(x) ((x) & 0xf) +#define SIGDET_DEGLITCH_CTRL(x) (((x) & 0xf) << 1) +#define SIGDET_DEGLITCH_BYP BIT(0) +#define INVERT_PCS_RX_CLK BIT(7) +#define PWM_EN BIT(6) +#define RXBIAS_SEL(x) (((x) & 0x3) << 4) +#define EBDAC_SIGN BIT(3) +#define EDAC_SIGN BIT(2) +#define EN_AUXTAP1SIGN_INVERT BIT(1) +#define EN_DAC_CHOPPING BIT(0) +#define DRVR_LOGIC_CLK_EN BIT(4) +#define DRVR_LOGIC_CLK_DIV(x) ((x) & 0xf) +#define PARALLEL_RATE_MODE2(x) (((x) & 0x3) << 4) +#define PARALLEL_RATE_MODE1(x) (((x) & 0x3) << 2) +#define PARALLEL_RATE_MODE0(x) ((x) & 0x3) +#define BAND_MODE2(x) (((x) & 0x3) << 4) +#define BAND_MODE1(x) (((x) & 0x3) << 2) +#define BAND_MODE0(x) ((x) & 0x3) +#define LANE_SYNC_MODE BIT(5) +#define LANE_MODE(x) ((x) & 0x1f) +#define CDR_PD_SEL_MODE0(x) (((x) & 0x3) << 5) +#define EN_DLL_MODE0 BIT(4) +#define EN_IQ_DCC_MODE0 BIT(3) +#define EN_IQCAL_MODE0 BIT(2) +#define EN_QPATH_MODE0 BIT(1) +#define EN_EPATH_MODE0 BIT(0) +#define FORCE_TSYNC_ACK BIT(7) +#define FORCE_CMN_ACK BIT(6) +#define FORCE_CMN_READY BIT(5) +#define EN_RCLK_DEGLITCH BIT(4) +#define BYPASS_RSM_CDR_RESET BIT(3) +#define BYPASS_RSM_TSYNC BIT(2) +#define BYPASS_RSM_SAMP_CAL BIT(1) +#define BYPASS_RSM_DLL_CAL BIT(0) + +/* EMAC_QSERDES_COM_SYS_CLK_CTRL */ +#define SYSCLK_CM BIT(4) +#define SYSCLK_AC_COUPLE BIT(3) + +/* EMAC_QSERDES_COM_PLL_CNTRL */ +#define OCP_EN BIT(5) +#define PLL_DIV_FFEN BIT(2) +#define PLL_DIV_ORD BIT(1) + +/* EMAC_QSERDES_COM_SYSCLK_EN_SEL */ +#define SYSCLK_SEL_CMOS BIT(3) + +/* EMAC_QSERDES_COM_RESETSM_CNTRL */ +#define FRQ_TUNE_MODE BIT(4) + +/* EMAC_QSERDES_COM_PLLLOCK_CMP_EN */ +#define PLLLOCK_CMP_EN BIT(0) + +/* EMAC_QSERDES_COM_DEC_START1 */ +#define DEC_START1_MUX BIT(7) +#define DEC_START1(x) ((x) & 0x7f) + +/* EMAC_QSERDES_COM_DIV_FRAC_START1 * EMAC_QSERDES_COM_DIV_FRAC_START2 */ +#define DIV_FRAC_START_MUX BIT(7) +#define DIV_FRAC_START(x) ((x) & 0x7f) + +/* EMAC_QSERDES_COM_DIV_FRAC_START3 */ +#define DIV_FRAC_START3_MUX BIT(4) +#define DIV_FRAC_START3(x) ((x) & 0xf) + +/* EMAC_QSERDES_COM_DEC_START2 */ +#define DEC_START2_MUX BIT(1) +#define DEC_START2 BIT(0) + +/* EMAC_QSERDES_COM_RESET_SM */ +#define READY BIT(5) + +/* EMAC_QSERDES_TX_TX_EMP_POST1_LVL */ +#define TX_EMP_POST1_LVL_MUX BIT(5) +#define TX_EMP_POST1_LVL(x) ((x) & 0x1f) +#define TX_EMP_POST1_LVL_BMSK 0x1f +#define TX_EMP_POST1_LVL_SHFT 0 + +/* EMAC_QSERDES_TX_TX_DRV_LVL */ +#define TX_DRV_LVL_MUX BIT(4) +#define TX_DRV_LVL(x) ((x) & 0xf) + +/* EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN */ +#define EMP_EN_MUX BIT(1) +#define EMP_EN BIT(0) + +/* EMAC_QSERDES_RX_CDR_CONTROL & EMAC_QSERDES_RX_CDR_CONTROL2 */ +#define HBW_PD_EN BIT(7) +#define SECONDORDERENABLE BIT(6) +#define FIRSTORDER_THRESH(x) (((x) & 0x7) << 3) +#define SECONDORDERGAIN(x) ((x) & 0x7) + +/* EMAC_QSERDES_RX_RX_EQ_GAIN12 */ +#define RX_EQ_GAIN2(x) (((x) & 0xf) << 4) +#define RX_EQ_GAIN1(x) ((x) & 0xf) + +/* EMAC_SGMII_PHY_SERDES_START */ +#define SERDES_START BIT(0) + +/* EMAC_SGMII_PHY_CMN_PWR_CTRL */ +#define BIAS_EN BIT(6) +#define PLL_EN BIT(5) +#define SYSCLK_EN BIT(4) +#define CLKBUF_L_EN BIT(3) +#define PLL_TXCLK_EN BIT(1) +#define PLL_RXCLK_EN BIT(0) + +/* EMAC_SGMII_PHY_RX_PWR_CTRL */ +#define L0_RX_SIGDET_EN BIT(7) +#define L0_RX_TERM_MODE(x) (((x) & 3) << 4) +#define L0_RX_I_EN BIT(1) + +/* EMAC_SGMII_PHY_TX_PWR_CTRL */ +#define L0_TX_EN BIT(5) +#define L0_CLKBUF_EN BIT(4) +#define L0_TRAN_BIAS_EN BIT(1) + +/* EMAC_SGMII_PHY_LANE_CTRL1 */ +#define L0_RX_EQUALIZE_ENABLE BIT(6) +#define L0_RESET_TSYNC_EN BIT(4) +#define L0_DRV_LVL(x) ((x) & 0xf) + +/* EMAC_SGMII_PHY_AUTONEG_CFG2 */ +#define FORCE_AN_TX_CFG BIT(5) +#define FORCE_AN_RX_CFG BIT(4) +#define AN_ENABLE BIT(0) + +/* EMAC_SGMII_PHY_SPEED_CFG1 */ +#define DUPLEX_MODE BIT(4) +#define SPDMODE_1000 BIT(1) +#define SPDMODE_100 BIT(0) +#define SPDMODE_10 0 +#define SPDMODE_BMSK 3 +#define SPDMODE_SHFT 0 + +/* EMAC_SGMII_PHY_POW_DWN_CTRL0 */ +#define PWRDN_B BIT(0) +#define CDR_MAX_CNT(x) ((x) & 0xff) + +/* EMAC_QSERDES_TX_BIST_MODE_LANENO */ +#define BIST_LANE_NUMBER(x) (((x) & 3) << 5) +#define BISTMODE(x) ((x) & 0x1f) + +/* EMAC_QSERDES_COM_PLLLOCK_CMPx */ +#define PLLLOCK_CMP(x) ((x) & 0xff) + +/* EMAC_SGMII_PHY_RESET_CTRL */ +#define PHY_SW_RESET BIT(0) + +/* EMAC_SGMII_PHY_IRQ_CMD */ +#define IRQ_GLOBAL_CLEAR BIT(0) + +/* EMAC_SGMII_PHY_INTERRUPT_MASK */ +#define DECODE_CODE_ERR BIT(7) +#define DECODE_DISP_ERR BIT(6) +#define PLL_UNLOCK BIT(5) +#define AN_ILLEGAL_TERM BIT(4) +#define SYNC_FAIL BIT(3) +#define AN_START BIT(2) +#define AN_END BIT(1) +#define AN_REQUEST BIT(0) + +#define SGMII_PHY_IRQ_CLR_WAIT_TIME 10 + +#define SGMII_PHY_INTERRUPT_ERR (\ + DECODE_CODE_ERR |\ + DECODE_DISP_ERR) + +#define SGMII_ISR_AN_MASK (\ + AN_REQUEST |\ + AN_START |\ + AN_END |\ + AN_ILLEGAL_TERM |\ + PLL_UNLOCK |\ + SYNC_FAIL) + +#define SGMII_ISR_MASK (\ + SGMII_PHY_INTERRUPT_ERR |\ + SGMII_ISR_AN_MASK) + +/* SGMII TX_CONFIG */ +#define TXCFG_LINK 0x8000 +#define TXCFG_MODE_BMSK 0x1c00 +#define TXCFG_1000_FULL 0x1800 +#define TXCFG_100_FULL 0x1400 +#define TXCFG_100_HALF 0x0400 +#define TXCFG_10_FULL 0x1000 +#define TXCFG_10_HALF 0x0000 + +#define SERDES_START_WAIT_TIMES 100 + +struct emac_reg_write { + unsigned int offset; + u32 val; +}; + +static void emac_reg_write_all(void __iomem *base, + const struct emac_reg_write *itr, size_t size) +{ + size_t i; + + for (i = 0; i < size; ++itr, ++i) + writel(itr->val, base + itr->offset); +} + +static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = { + {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)}, + {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B}, + {EMAC_SGMII_PHY_CMN_PWR_CTRL, + BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN}, + {EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN}, + {EMAC_SGMII_PHY_RX_PWR_CTRL, + L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN}, + {EMAC_SGMII_PHY_CMN_PWR_CTRL, + BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | + PLL_RXCLK_EN}, + {EMAC_SGMII_PHY_LANE_CTRL1, + L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)}, +}; + +static const struct emac_reg_write sysclk_refclk_setting[] = { + {EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS}, + {EMAC_QSERDES_COM_SYS_CLK_CTRL, SYSCLK_CM | SYSCLK_AC_COUPLE}, +}; + +static const struct emac_reg_write pll_setting[] = { + {EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)}, + {EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)}, + {EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)}, + {EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)}, + {EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)}, + {EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD}, + {EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)}, + {EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2}, + {EMAC_QSERDES_COM_DIV_FRAC_START1, + DIV_FRAC_START_MUX | DIV_FRAC_START(85)}, + {EMAC_QSERDES_COM_DIV_FRAC_START2, + DIV_FRAC_START_MUX | DIV_FRAC_START(42)}, + {EMAC_QSERDES_COM_DIV_FRAC_START3, + DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN}, + {EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE}, +}; + +static const struct emac_reg_write cdr_setting[] = { + {EMAC_QSERDES_RX_CDR_CONTROL, + SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)}, + {EMAC_QSERDES_RX_CDR_CONTROL2, + SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)}, +}; + +static const struct emac_reg_write tx_rx_setting[] = { + {EMAC_QSERDES_TX_BIST_MODE_LANENO, 0}, + {EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)}, + {EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN}, + {EMAC_QSERDES_TX_TX_EMP_POST1_LVL, + TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)}, + {EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)}, + {EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)}, +}; + +static const struct emac_reg_write sgmii_v2_laned[] = { + /* CDR Settings */ + {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0, + UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)}, + {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)}, + {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)}, + + /* TX/RX Settings */ + {EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2}, + + {EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE}, + {EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN}, + {EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)}, + {EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX}, + {EMAC_SGMII_LN_TX_POST, TX_POST_MUX}, + + {EMAC_SGMII_LN_CML_CTRL_MODE0, + CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)}, + {EMAC_SGMII_LN_MIXER_CTRL_MODE0, + MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)}, + {EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)}, + {EMAC_SGMII_LN_SIGDET_ENABLES, + SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP}, + {EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)}, + + {EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)}, + {EMAC_SGMII_LN_RX_MISC_CNTRL0, 0}, + {EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV, + DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)}, + + {EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)}, + {EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)}, + {EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)}, + {EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)}, + {EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)}, + {EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL}, +}; + +static const struct emac_reg_write physical_coding_sublayer_programming_v2[] = { + {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B}, + {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)}, + {EMAC_SGMII_PHY_TX_PWR_CTRL, 0}, + {EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE}, +}; + +static int emac_sgmii_link_init(struct emac_adapter *adpt) +{ + struct phy_device *phydev = adpt->phydev; + struct emac_phy *phy = &adpt->phy; + u32 val; + + val = readl(phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + + if (phydev->autoneg == AUTONEG_ENABLE) { + val &= ~(FORCE_AN_RX_CFG | FORCE_AN_TX_CFG); + val |= AN_ENABLE; + writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + } else { + u32 speed_cfg; + + switch (phydev->speed) { + case SPEED_10: + speed_cfg = SPDMODE_10; + break; + case SPEED_100: + speed_cfg = SPDMODE_100; + break; + case SPEED_1000: + speed_cfg = SPDMODE_1000; + break; + default: + return -EINVAL; + } + + if (phydev->duplex == DUPLEX_FULL) + speed_cfg |= DUPLEX_MODE; + + val &= ~AN_ENABLE; + writel(speed_cfg, phy->base + EMAC_SGMII_PHY_SPEED_CFG1); + writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + } + + return 0; +} + +static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits) +{ + struct emac_phy *phy = &adpt->phy; + u32 status; + + writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); + writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD); + /* Ensure interrupt clear command is written to HW */ + wmb(); + + /* After set the IRQ_GLOBAL_CLEAR bit, the status clearing must + * be confirmed before clearing the bits in other registers. + * It takes a few cycles for hw to clear the interrupt status. + */ + if (readl_poll_timeout_atomic(phy->base + + EMAC_SGMII_PHY_INTERRUPT_STATUS, + status, !(status & irq_bits), 1, + SGMII_PHY_IRQ_CLR_WAIT_TIME)) { + netdev_err(adpt->netdev, + "error: failed clear SGMII irq: status:0x%x bits:0x%x\n", + status, irq_bits); + return -EIO; + } + + /* Finalize clearing procedure */ + writel_relaxed(0, phy->base + EMAC_SGMII_PHY_IRQ_CMD); + writel_relaxed(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); + + /* Ensure that clearing procedure finalization is written to HW */ + wmb(); + + return 0; +} + +int emac_sgmii_init_v1(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + unsigned int i; + int ret; + + ret = emac_sgmii_link_init(adpt); + if (ret) + return ret; + + emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1, + ARRAY_SIZE(physical_coding_sublayer_programming_v1)); + emac_reg_write_all(phy->base, sysclk_refclk_setting, + ARRAY_SIZE(sysclk_refclk_setting)); + emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting)); + emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting)); + emac_reg_write_all(phy->base, tx_rx_setting, + ARRAY_SIZE(tx_rx_setting)); + + /* Power up the Ser/Des engine */ + writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START); + + for (i = 0; i < SERDES_START_WAIT_TIMES; i++) { + if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY) + break; + usleep_range(100, 200); + } + + if (i == SERDES_START_WAIT_TIMES) { + netdev_err(adpt->netdev, "error: ser/des failed to start\n"); + return -EIO; + } + /* Mask out all the SGMII Interrupt */ + writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK); + + emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR); + + return 0; +} + +int emac_sgmii_init_v2(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + void __iomem *phy_regs = phy->base; + void __iomem *laned = phy->digital; + unsigned int i; + u32 lnstatus; + int ret; + + ret = emac_sgmii_link_init(adpt); + if (ret) + return ret; + + /* PCS lane-x init */ + emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v2, + ARRAY_SIZE(physical_coding_sublayer_programming_v2)); + + /* SGMII lane-x init */ + emac_reg_write_all(phy->digital, + sgmii_v2_laned, ARRAY_SIZE(sgmii_v2_laned)); + + /* Power up PCS and start reset lane state machine */ + + writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL); + writel(1, laned + SGMII_LN_RSM_START); + + /* Wait for c_ready assertion */ + for (i = 0; i < SERDES_START_WAIT_TIMES; i++) { + lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS); + if (lnstatus & BIT(1)) + break; + usleep_range(100, 200); + } + + if (i == SERDES_START_WAIT_TIMES) { + netdev_err(adpt->netdev, "SGMII failed to start\n"); + return -EIO; + } + + /* Disable digital and SERDES loopback */ + writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0); + writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2); + writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1); + + /* Mask out all the SGMII Interrupt */ + writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK); + + emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR); + + return 0; +} + +static void emac_sgmii_reset_prepare(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + u32 val; + + /* Reset PHY */ + val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2); + writel(((val & ~PHY_RESET) | PHY_RESET), phy->base + + EMAC_EMAC_WRAPPER_CSR2); + /* Ensure phy-reset command is written to HW before the release cmd */ + msleep(50); + val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2); + writel((val & ~PHY_RESET), phy->base + EMAC_EMAC_WRAPPER_CSR2); + /* Ensure phy-reset release command is written to HW before initializing + * SGMII + */ + msleep(50); +} + +void emac_sgmii_reset(struct emac_adapter *adpt) +{ + int ret; + + clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); + emac_sgmii_reset_prepare(adpt); + + ret = adpt->phy.initialize(adpt); + if (ret) + netdev_err(adpt->netdev, + "could not reinitialize internal PHY (error=%i)\n", + ret); + + clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); +} + +static const struct of_device_id emac_sgmii_dt_match[] = { + { + .compatible = "qcom,fsm9900-emac-sgmii", + .data = emac_sgmii_init_v1, + }, + { + .compatible = "qcom,qdf2432-emac-sgmii", + .data = emac_sgmii_init_v2, + }, + {} +}; + +int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) +{ + struct platform_device *sgmii_pdev = NULL; + struct emac_phy *phy = &adpt->phy; + struct resource *res; + const struct of_device_id *match; + struct device_node *np; + + np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); + if (!np) { + dev_err(&pdev->dev, "missing internal-phy property\n"); + return -ENODEV; + } + + sgmii_pdev = of_find_device_by_node(np); + if (!sgmii_pdev) { + dev_err(&pdev->dev, "invalid internal-phy property\n"); + return -ENODEV; + } + + match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); + if (!match) { + dev_err(&pdev->dev, "unrecognized internal phy node\n"); + return -ENODEV; + } + + phy->initialize = (emac_sgmii_initialize)match->data; + + /* Base address is the first address */ + res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); + phy->base = devm_ioremap_resource(&sgmii_pdev->dev, res); + if (IS_ERR(phy->base)) + return PTR_ERR(phy->base); + + /* v2 SGMII has a per-lane digital digital, so parse it if it exists */ + res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1); + if (res) { + phy->digital = devm_ioremap_resource(&sgmii_pdev->dev, res); + if (IS_ERR(phy->base)) + return PTR_ERR(phy->base); + + } + + return phy->initialize(adpt); +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h new file mode 100644 index 000000000000..ce79212ff403 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EMAC_SGMII_H_ +#define _EMAC_SGMII_H_ + +struct emac_adapter; +struct platform_device; + +int emac_sgmii_init_v1(struct emac_adapter *adpt); +int emac_sgmii_init_v2(struct emac_adapter *adpt); +int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt); +void emac_sgmii_reset(struct emac_adapter *adpt); + +#endif diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c new file mode 100644 index 000000000000..56e0a9f1c5ec --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -0,0 +1,743 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC Gigabit Ethernet Driver */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-phy.h" +#include "emac-sgmii.h" + +#define EMAC_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ + NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP) + +#define EMAC_RRD_SIZE 4 +/* The RRD size if timestamping is enabled: */ +#define EMAC_TS_RRD_SIZE 6 +#define EMAC_TPD_SIZE 4 +#define EMAC_RFD_SIZE 2 + +#define REG_MAC_RX_STATUS_BIN EMAC_RXMAC_STATC_REG0 +#define REG_MAC_RX_STATUS_END EMAC_RXMAC_STATC_REG22 +#define REG_MAC_TX_STATUS_BIN EMAC_TXMAC_STATC_REG0 +#define REG_MAC_TX_STATUS_END EMAC_TXMAC_STATC_REG24 + +#define RXQ0_NUM_RFD_PREF_DEF 8 +#define TXQ0_NUM_TPD_PREF_DEF 5 + +#define EMAC_PREAMBLE_DEF 7 + +#define DMAR_DLY_CNT_DEF 15 +#define DMAW_DLY_CNT_DEF 4 + +#define IMR_NORMAL_MASK (\ + ISR_ERROR |\ + ISR_GPHY_LINK |\ + ISR_TX_PKT |\ + GPHY_WAKEUP_INT) + +#define IMR_EXTENDED_MASK (\ + SW_MAN_INT |\ + ISR_OVER |\ + ISR_ERROR |\ + ISR_GPHY_LINK |\ + ISR_TX_PKT |\ + GPHY_WAKEUP_INT) + +#define ISR_TX_PKT (\ + TX_PKT_INT |\ + TX_PKT_INT1 |\ + TX_PKT_INT2 |\ + TX_PKT_INT3) + +#define ISR_GPHY_LINK (\ + GPHY_LINK_UP_INT |\ + GPHY_LINK_DOWN_INT) + +#define ISR_OVER (\ + RFD0_UR_INT |\ + RFD1_UR_INT |\ + RFD2_UR_INT |\ + RFD3_UR_INT |\ + RFD4_UR_INT |\ + RXF_OF_INT |\ + TXF_UR_INT) + +#define ISR_ERROR (\ + DMAR_TO_INT |\ + DMAW_TO_INT |\ + TXQ_TO_INT) + +/* in sync with enum emac_clk_id */ +static const char * const emac_clk_name[] = { + "axi_clk", "cfg_ahb_clk", "high_speed_clk", "mdio_clk", "tx_clk", + "rx_clk", "sys_clk" +}; + +void emac_reg_update32(void __iomem *addr, u32 mask, u32 val) +{ + u32 data = readl(addr); + + writel(((data & ~mask) | val), addr); +} + +/* reinitialize */ +int emac_reinit_locked(struct emac_adapter *adpt) +{ + int ret; + + mutex_lock(&adpt->reset_lock); + + emac_mac_down(adpt); + emac_sgmii_reset(adpt); + ret = emac_mac_up(adpt); + + mutex_unlock(&adpt->reset_lock); + + return ret; +} + +/* NAPI */ +static int emac_napi_rtx(struct napi_struct *napi, int budget) +{ + struct emac_rx_queue *rx_q = + container_of(napi, struct emac_rx_queue, napi); + struct emac_adapter *adpt = netdev_priv(rx_q->netdev); + struct emac_irq *irq = rx_q->irq; + int work_done = 0; + + emac_mac_rx_process(adpt, rx_q, &work_done, budget); + + if (work_done < budget) { + napi_complete(napi); + + irq->mask |= rx_q->intr; + writel(irq->mask, adpt->base + EMAC_INT_MASK); + } + + return work_done; +} + +/* Transmit the packet */ +static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb); +} + +irqreturn_t emac_isr(int _irq, void *data) +{ + struct emac_irq *irq = data; + struct emac_adapter *adpt = + container_of(irq, struct emac_adapter, irq); + struct emac_rx_queue *rx_q = &adpt->rx_q; + u32 isr, status; + + /* disable the interrupt */ + writel(0, adpt->base + EMAC_INT_MASK); + + isr = readl_relaxed(adpt->base + EMAC_INT_STATUS); + + status = isr & irq->mask; + if (status == 0) + goto exit; + + if (status & ISR_ERROR) { + netif_warn(adpt, intr, adpt->netdev, + "warning: error irq status 0x%lx\n", + status & ISR_ERROR); + /* reset MAC */ + schedule_work(&adpt->work_thread); + } + + /* Schedule the napi for receive queue with interrupt + * status bit set + */ + if (status & rx_q->intr) { + if (napi_schedule_prep(&rx_q->napi)) { + irq->mask &= ~rx_q->intr; + __napi_schedule(&rx_q->napi); + } + } + + if (status & TX_PKT_INT) + emac_mac_tx_process(adpt, &adpt->tx_q); + + if (status & ISR_OVER) + net_warn_ratelimited("warning: TX/RX overflow\n"); + + /* link event */ + if (status & ISR_GPHY_LINK) + phy_mac_interrupt(adpt->phydev, !!(status & GPHY_LINK_UP_INT)); + +exit: + /* enable the interrupt */ + writel(irq->mask, adpt->base + EMAC_INT_MASK); + + return IRQ_HANDLED; +} + +/* Configure VLAN tag strip/insert feature */ +static int emac_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = features ^ netdev->features; + struct emac_adapter *adpt = netdev_priv(netdev); + + /* We only need to reprogram the hardware if the VLAN tag features + * have changed, and if it's already running. + */ + if (!(changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX))) + return 0; + + if (!netif_running(netdev)) + return 0; + + /* emac_mac_mode_config() uses netdev->features to configure the EMAC, + * so make sure it's set first. + */ + netdev->features = features; + + return emac_reinit_locked(adpt); +} + +/* Configure Multicast and Promiscuous modes */ +static void emac_rx_mode_set(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + struct netdev_hw_addr *ha; + + emac_mac_mode_config(adpt); + + /* update multicast address filtering */ + emac_mac_multicast_addr_clear(adpt); + netdev_for_each_mc_addr(ha, netdev) + emac_mac_multicast_addr_set(adpt, ha->addr); +} + +/* Change the Maximum Transfer Unit (MTU) */ +static int emac_change_mtu(struct net_device *netdev, int new_mtu) +{ + unsigned int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct emac_adapter *adpt = netdev_priv(netdev); + + if ((max_frame < EMAC_MIN_ETH_FRAME_SIZE) || + (max_frame > EMAC_MAX_ETH_FRAME_SIZE)) { + netdev_err(adpt->netdev, "error: invalid MTU setting\n"); + return -EINVAL; + } + + netif_info(adpt, hw, adpt->netdev, + "changing MTU from %d to %d\n", netdev->mtu, + new_mtu); + netdev->mtu = new_mtu; + + if (netif_running(netdev)) + return emac_reinit_locked(adpt); + + return 0; +} + +/* Called when the network interface is made active */ +static int emac_open(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + int ret; + + /* allocate rx/tx dma buffer & descriptors */ + ret = emac_mac_rx_tx_rings_alloc_all(adpt); + if (ret) { + netdev_err(adpt->netdev, "error allocating rx/tx rings\n"); + return ret; + } + + ret = emac_mac_up(adpt); + if (ret) { + emac_mac_rx_tx_rings_free_all(adpt); + return ret; + } + + emac_mac_start(adpt); + + return 0; +} + +/* Called when the network interface is disabled */ +static int emac_close(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + mutex_lock(&adpt->reset_lock); + + emac_mac_down(adpt); + emac_mac_rx_tx_rings_free_all(adpt); + + mutex_unlock(&adpt->reset_lock); + + return 0; +} + +/* Respond to a TX hang */ +static void emac_tx_timeout(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + schedule_work(&adpt->work_thread); +} + +/* IOCTL support for the interface */ +static int emac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + if (!netif_running(netdev)) + return -EINVAL; + + if (!netdev->phydev) + return -ENODEV; + + return phy_mii_ioctl(netdev->phydev, ifr, cmd); +} + +/* Provide network statistics info for the interface */ +static struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *net_stats) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + unsigned int addr = REG_MAC_RX_STATUS_BIN; + struct emac_stats *stats = &adpt->stats; + u64 *stats_itr = &adpt->stats.rx_ok; + u32 val; + + spin_lock(&stats->lock); + + while (addr <= REG_MAC_RX_STATUS_END) { + val = readl_relaxed(adpt->base + addr); + *stats_itr += val; + stats_itr++; + addr += sizeof(u32); + } + + /* additional rx status */ + val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG23); + adpt->stats.rx_crc_align += val; + val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG24); + adpt->stats.rx_jabbers += val; + + /* update tx status */ + addr = REG_MAC_TX_STATUS_BIN; + stats_itr = &adpt->stats.tx_ok; + + while (addr <= REG_MAC_TX_STATUS_END) { + val = readl_relaxed(adpt->base + addr); + *stats_itr += val; + ++stats_itr; + addr += sizeof(u32); + } + + /* additional tx status */ + val = readl_relaxed(adpt->base + EMAC_TXMAC_STATC_REG25); + adpt->stats.tx_col += val; + + /* return parsed statistics */ + net_stats->rx_packets = stats->rx_ok; + net_stats->tx_packets = stats->tx_ok; + net_stats->rx_bytes = stats->rx_byte_cnt; + net_stats->tx_bytes = stats->tx_byte_cnt; + net_stats->multicast = stats->rx_mcast; + net_stats->collisions = stats->tx_1_col + stats->tx_2_col * 2 + + stats->tx_late_col + stats->tx_abort_col; + + net_stats->rx_errors = stats->rx_frag + stats->rx_fcs_err + + stats->rx_len_err + stats->rx_sz_ov + + stats->rx_align_err; + net_stats->rx_fifo_errors = stats->rx_rxf_ov; + net_stats->rx_length_errors = stats->rx_len_err; + net_stats->rx_crc_errors = stats->rx_fcs_err; + net_stats->rx_frame_errors = stats->rx_align_err; + net_stats->rx_over_errors = stats->rx_rxf_ov; + net_stats->rx_missed_errors = stats->rx_rxf_ov; + + net_stats->tx_errors = stats->tx_late_col + stats->tx_abort_col + + stats->tx_underrun + stats->tx_trunc; + net_stats->tx_fifo_errors = stats->tx_underrun; + net_stats->tx_aborted_errors = stats->tx_abort_col; + net_stats->tx_window_errors = stats->tx_late_col; + + spin_unlock(&stats->lock); + + return net_stats; +} + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_close, + .ndo_validate_addr = eth_validate_addr, + .ndo_start_xmit = emac_start_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_change_mtu = emac_change_mtu, + .ndo_do_ioctl = emac_ioctl, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_get_stats64 = emac_get_stats64, + .ndo_set_features = emac_set_features, + .ndo_set_rx_mode = emac_rx_mode_set, +}; + +/* Watchdog task routine, called to reinitialize the EMAC */ +static void emac_work_thread(struct work_struct *work) +{ + struct emac_adapter *adpt = + container_of(work, struct emac_adapter, work_thread); + + emac_reinit_locked(adpt); +} + +/* Initialize various data structures */ +static void emac_init_adapter(struct emac_adapter *adpt) +{ + u32 reg; + + /* descriptors */ + adpt->tx_desc_cnt = EMAC_DEF_TX_DESCS; + adpt->rx_desc_cnt = EMAC_DEF_RX_DESCS; + + /* dma */ + adpt->dma_order = emac_dma_ord_out; + adpt->dmar_block = emac_dma_req_4096; + adpt->dmaw_block = emac_dma_req_128; + adpt->dmar_dly_cnt = DMAR_DLY_CNT_DEF; + adpt->dmaw_dly_cnt = DMAW_DLY_CNT_DEF; + adpt->tpd_burst = TXQ0_NUM_TPD_PREF_DEF; + adpt->rfd_burst = RXQ0_NUM_RFD_PREF_DEF; + + /* irq moderator */ + reg = ((EMAC_DEF_RX_IRQ_MOD >> 1) << IRQ_MODERATOR2_INIT_SHFT) | + ((EMAC_DEF_TX_IRQ_MOD >> 1) << IRQ_MODERATOR_INIT_SHFT); + adpt->irq_mod = reg; + + /* others */ + adpt->preamble = EMAC_PREAMBLE_DEF; +} + +/* Get the clock */ +static int emac_clks_get(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + unsigned int i; + + for (i = 0; i < EMAC_CLK_CNT; i++) { + struct clk *clk = devm_clk_get(&pdev->dev, emac_clk_name[i]); + + if (IS_ERR(clk)) { + dev_err(&pdev->dev, + "could not claim clock %s (error=%li)\n", + emac_clk_name[i], PTR_ERR(clk)); + + return PTR_ERR(clk); + } + + adpt->clk[i] = clk; + } + + return 0; +} + +/* Initialize clocks */ +static int emac_clks_phase1_init(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + int ret; + + ret = emac_clks_get(pdev, adpt); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_AXI]); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); + if (ret) + return ret; + + return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); +} + +/* Enable clocks; needs emac_clks_phase1_init to be called before */ +static int emac_clks_phase2_init(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + int ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_TX], 125000000); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_TX]); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_MDIO], 25000000); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_MDIO]); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_RX]); + if (ret) + return ret; + + return clk_prepare_enable(adpt->clk[EMAC_CLK_SYS]); +} + +static void emac_clks_teardown(struct emac_adapter *adpt) +{ + + unsigned int i; + + for (i = 0; i < EMAC_CLK_CNT; i++) + clk_disable_unprepare(adpt->clk[i]); +} + +/* Get the resources */ +static int emac_probe_resources(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + struct device_node *node = pdev->dev.of_node; + struct net_device *netdev = adpt->netdev; + struct resource *res; + const void *maddr; + int ret = 0; + + /* get mac address */ + maddr = of_get_mac_address(node); + if (!maddr) + eth_hw_addr_random(netdev); + else + ether_addr_copy(netdev->dev_addr, maddr); + + /* Core 0 interrupt */ + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_err(&pdev->dev, + "error: missing core0 irq resource (error=%i)\n", ret); + return ret; + } + adpt->irq.irq = ret; + + /* base register address */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + adpt->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(adpt->base)) + return PTR_ERR(adpt->base); + + /* CSR register address */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + adpt->csr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(adpt->csr)) + return PTR_ERR(adpt->csr); + + netdev->base_addr = (unsigned long)adpt->base; + + return 0; +} + +static const struct of_device_id emac_dt_match[] = { + { + .compatible = "qcom,fsm9900-emac", + }, + {} +}; + +static int emac_probe(struct platform_device *pdev) +{ + struct net_device *netdev; + struct emac_adapter *adpt; + struct emac_phy *phy; + u16 devid, revid; + u32 reg; + int ret; + + /* The EMAC itself is capable of 64-bit DMA, so try that first. */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + /* Some platforms may restrict the EMAC's address bus to less + * then the size of DDR. In this case, we need to try a + * smaller mask. We could try every possible smaller mask, + * but that's overkill. Instead, just fall to 32-bit, which + * should always work. + */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "could not set DMA mask\n"); + return ret; + } + } + + netdev = alloc_etherdev(sizeof(struct emac_adapter)); + if (!netdev) + return -ENOMEM; + + dev_set_drvdata(&pdev->dev, netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + adpt = netdev_priv(netdev); + adpt->netdev = netdev; + adpt->msg_enable = EMAC_MSG_DEFAULT; + + phy = &adpt->phy; + + mutex_init(&adpt->reset_lock); + spin_lock_init(&adpt->stats.lock); + + adpt->irq.mask = RX_PKT_INT0 | IMR_NORMAL_MASK; + + ret = emac_probe_resources(pdev, adpt); + if (ret) + goto err_undo_netdev; + + /* initialize clocks */ + ret = emac_clks_phase1_init(pdev, adpt); + if (ret) { + dev_err(&pdev->dev, "could not initialize clocks\n"); + goto err_undo_netdev; + } + + netdev->watchdog_timeo = EMAC_WATCHDOG_TIME; + netdev->irq = adpt->irq.irq; + + adpt->rrd_size = EMAC_RRD_SIZE; + adpt->tpd_size = EMAC_TPD_SIZE; + adpt->rfd_size = EMAC_RFD_SIZE; + + netdev->netdev_ops = &emac_netdev_ops; + + emac_init_adapter(adpt); + + /* init external phy */ + ret = emac_phy_config(pdev, adpt); + if (ret) + goto err_undo_clocks; + + /* init internal sgmii phy */ + ret = emac_sgmii_config(pdev, adpt); + if (ret) + goto err_undo_mdiobus; + + /* enable clocks */ + ret = emac_clks_phase2_init(pdev, adpt); + if (ret) { + dev_err(&pdev->dev, "could not initialize clocks\n"); + goto err_undo_mdiobus; + } + + emac_mac_reset(adpt); + + /* set hw features */ + netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features = netdev->features; + + netdev->vlan_features |= NETIF_F_SG | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; + + INIT_WORK(&adpt->work_thread, emac_work_thread); + + /* Initialize queues */ + emac_mac_rx_tx_ring_init_all(pdev, adpt); + + netif_napi_add(netdev, &adpt->rx_q.napi, emac_napi_rtx, + NAPI_POLL_WEIGHT); + + ret = register_netdev(netdev); + if (ret) { + dev_err(&pdev->dev, "could not register net device\n"); + goto err_undo_napi; + } + + reg = readl_relaxed(adpt->base + EMAC_DMA_MAS_CTRL); + devid = (reg & DEV_ID_NUM_BMSK) >> DEV_ID_NUM_SHFT; + revid = (reg & DEV_REV_NUM_BMSK) >> DEV_REV_NUM_SHFT; + reg = readl_relaxed(adpt->base + EMAC_CORE_HW_VERSION); + + netif_info(adpt, probe, netdev, + "hardware id %d.%d, hardware version %d.%d.%d\n", + devid, revid, + (reg & MAJOR_BMSK) >> MAJOR_SHFT, + (reg & MINOR_BMSK) >> MINOR_SHFT, + (reg & STEP_BMSK) >> STEP_SHFT); + + return 0; + +err_undo_napi: + netif_napi_del(&adpt->rx_q.napi); +err_undo_mdiobus: + mdiobus_unregister(adpt->mii_bus); +err_undo_clocks: + emac_clks_teardown(adpt); +err_undo_netdev: + free_netdev(netdev); + + return ret; +} + +static int emac_remove(struct platform_device *pdev) +{ + struct net_device *netdev = dev_get_drvdata(&pdev->dev); + struct emac_adapter *adpt = netdev_priv(netdev); + + unregister_netdev(netdev); + netif_napi_del(&adpt->rx_q.napi); + + emac_clks_teardown(adpt); + + mdiobus_unregister(adpt->mii_bus); + free_netdev(netdev); + dev_set_drvdata(&pdev->dev, NULL); + + return 0; +} + +static struct platform_driver emac_platform_driver = { + .probe = emac_probe, + .remove = emac_remove, + .driver = { + .owner = THIS_MODULE, + .name = "qcom-emac", + .of_match_table = emac_dt_match, + }, +}; + +module_platform_driver(emac_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:qcom-emac"); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h new file mode 100644 index 000000000000..0c76e6cb8c9e --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac.h @@ -0,0 +1,335 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EMAC_H_ +#define _EMAC_H_ + +#include +#include +#include +#include +#include "emac-mac.h" +#include "emac-phy.h" + +/* EMAC base register offsets */ +#define EMAC_DMA_MAS_CTRL 0x001400 +#define EMAC_IRQ_MOD_TIM_INIT 0x001408 +#define EMAC_BLK_IDLE_STS 0x00140c +#define EMAC_PHY_LINK_DELAY 0x00141c +#define EMAC_SYS_ALIV_CTRL 0x001434 +#define EMAC_MAC_IPGIFG_CTRL 0x001484 +#define EMAC_MAC_STA_ADDR0 0x001488 +#define EMAC_MAC_STA_ADDR1 0x00148c +#define EMAC_HASH_TAB_REG0 0x001490 +#define EMAC_HASH_TAB_REG1 0x001494 +#define EMAC_MAC_HALF_DPLX_CTRL 0x001498 +#define EMAC_MAX_FRAM_LEN_CTRL 0x00149c +#define EMAC_INT_STATUS 0x001600 +#define EMAC_INT_MASK 0x001604 +#define EMAC_RXMAC_STATC_REG0 0x001700 +#define EMAC_RXMAC_STATC_REG22 0x001758 +#define EMAC_TXMAC_STATC_REG0 0x001760 +#define EMAC_TXMAC_STATC_REG24 0x0017c0 +#define EMAC_CORE_HW_VERSION 0x001974 +#define EMAC_IDT_TABLE0 0x001b00 +#define EMAC_RXMAC_STATC_REG23 0x001bc8 +#define EMAC_RXMAC_STATC_REG24 0x001bcc +#define EMAC_TXMAC_STATC_REG25 0x001bd0 +#define EMAC_INT1_MASK 0x001bf0 +#define EMAC_INT1_STATUS 0x001bf4 +#define EMAC_INT2_MASK 0x001bf8 +#define EMAC_INT2_STATUS 0x001bfc +#define EMAC_INT3_MASK 0x001c00 +#define EMAC_INT3_STATUS 0x001c04 + +/* EMAC_DMA_MAS_CTRL */ +#define DEV_ID_NUM_BMSK 0x7f000000 +#define DEV_ID_NUM_SHFT 24 +#define DEV_REV_NUM_BMSK 0xff0000 +#define DEV_REV_NUM_SHFT 16 +#define INT_RD_CLR_EN 0x4000 +#define IRQ_MODERATOR2_EN 0x800 +#define IRQ_MODERATOR_EN 0x400 +#define LPW_CLK_SEL 0x80 +#define LPW_STATE 0x20 +#define LPW_MODE 0x10 +#define SOFT_RST 0x1 + +/* EMAC_IRQ_MOD_TIM_INIT */ +#define IRQ_MODERATOR2_INIT_BMSK 0xffff0000 +#define IRQ_MODERATOR2_INIT_SHFT 16 +#define IRQ_MODERATOR_INIT_BMSK 0xffff +#define IRQ_MODERATOR_INIT_SHFT 0 + +/* EMAC_INT_STATUS */ +#define DIS_INT BIT(31) +#define PTP_INT BIT(30) +#define RFD4_UR_INT BIT(29) +#define TX_PKT_INT3 BIT(26) +#define TX_PKT_INT2 BIT(25) +#define TX_PKT_INT1 BIT(24) +#define RX_PKT_INT3 BIT(19) +#define RX_PKT_INT2 BIT(18) +#define RX_PKT_INT1 BIT(17) +#define RX_PKT_INT0 BIT(16) +#define TX_PKT_INT BIT(15) +#define TXQ_TO_INT BIT(14) +#define GPHY_WAKEUP_INT BIT(13) +#define GPHY_LINK_DOWN_INT BIT(12) +#define GPHY_LINK_UP_INT BIT(11) +#define DMAW_TO_INT BIT(10) +#define DMAR_TO_INT BIT(9) +#define TXF_UR_INT BIT(8) +#define RFD3_UR_INT BIT(7) +#define RFD2_UR_INT BIT(6) +#define RFD1_UR_INT BIT(5) +#define RFD0_UR_INT BIT(4) +#define RXF_OF_INT BIT(3) +#define SW_MAN_INT BIT(2) + +/* EMAC_MAILBOX_6 */ +#define RFD2_PROC_IDX_BMSK 0xfff0000 +#define RFD2_PROC_IDX_SHFT 16 +#define RFD2_PROD_IDX_BMSK 0xfff +#define RFD2_PROD_IDX_SHFT 0 + +/* EMAC_CORE_HW_VERSION */ +#define MAJOR_BMSK 0xf0000000 +#define MAJOR_SHFT 28 +#define MINOR_BMSK 0xfff0000 +#define MINOR_SHFT 16 +#define STEP_BMSK 0xffff +#define STEP_SHFT 0 + +/* EMAC_EMAC_WRAPPER_CSR1 */ +#define TX_INDX_FIFO_SYNC_RST BIT(23) +#define TX_TS_FIFO_SYNC_RST BIT(22) +#define RX_TS_FIFO2_SYNC_RST BIT(21) +#define RX_TS_FIFO1_SYNC_RST BIT(20) +#define TX_TS_ENABLE BIT(16) +#define DIS_1588_CLKS BIT(11) +#define FREQ_MODE BIT(9) +#define ENABLE_RRD_TIMESTAMP BIT(3) + +/* EMAC_EMAC_WRAPPER_CSR2 */ +#define HDRIVE_BMSK 0x3000 +#define HDRIVE_SHFT 12 +#define SLB_EN BIT(9) +#define PLB_EN BIT(8) +#define WOL_EN BIT(3) +#define PHY_RESET BIT(0) + +#define EMAC_DEV_ID 0x0040 + +/* SGMII v2 per lane registers */ +#define SGMII_LN_RSM_START 0x029C + +/* SGMII v2 PHY common registers */ +#define SGMII_PHY_CMN_CTRL 0x0408 +#define SGMII_PHY_CMN_RESET_CTRL 0x0410 + +/* SGMII v2 PHY registers per lane */ +#define SGMII_PHY_LN_OFFSET 0x0400 +#define SGMII_PHY_LN_LANE_STATUS 0x00DC +#define SGMII_PHY_LN_BIST_GEN0 0x008C +#define SGMII_PHY_LN_BIST_GEN1 0x0090 +#define SGMII_PHY_LN_BIST_GEN2 0x0094 +#define SGMII_PHY_LN_BIST_GEN3 0x0098 +#define SGMII_PHY_LN_CDR_CTRL1 0x005C + +enum emac_clk_id { + EMAC_CLK_AXI, + EMAC_CLK_CFG_AHB, + EMAC_CLK_HIGH_SPEED, + EMAC_CLK_MDIO, + EMAC_CLK_TX, + EMAC_CLK_RX, + EMAC_CLK_SYS, + EMAC_CLK_CNT +}; + +#define EMAC_LINK_SPEED_UNKNOWN 0x0 +#define EMAC_LINK_SPEED_10_HALF BIT(0) +#define EMAC_LINK_SPEED_10_FULL BIT(1) +#define EMAC_LINK_SPEED_100_HALF BIT(2) +#define EMAC_LINK_SPEED_100_FULL BIT(3) +#define EMAC_LINK_SPEED_1GB_FULL BIT(5) + +#define EMAC_MAX_SETUP_LNK_CYCLE 100 + +/* Wake On Lan */ +#define EMAC_WOL_PHY 0x00000001 /* PHY Status Change */ +#define EMAC_WOL_MAGIC 0x00000002 /* Magic Packet */ + +struct emac_stats { + /* rx */ + u64 rx_ok; /* good packets */ + u64 rx_bcast; /* good broadcast packets */ + u64 rx_mcast; /* good multicast packets */ + u64 rx_pause; /* pause packet */ + u64 rx_ctrl; /* control packets other than pause frame. */ + u64 rx_fcs_err; /* packets with bad FCS. */ + u64 rx_len_err; /* packets with length mismatch */ + u64 rx_byte_cnt; /* good bytes count (without FCS) */ + u64 rx_runt; /* runt packets */ + u64 rx_frag; /* fragment count */ + u64 rx_sz_64; /* packets that are 64 bytes */ + u64 rx_sz_65_127; /* packets that are 65-127 bytes */ + u64 rx_sz_128_255; /* packets that are 128-255 bytes */ + u64 rx_sz_256_511; /* packets that are 256-511 bytes */ + u64 rx_sz_512_1023; /* packets that are 512-1023 bytes */ + u64 rx_sz_1024_1518; /* packets that are 1024-1518 bytes */ + u64 rx_sz_1519_max; /* packets that are 1519-MTU bytes*/ + u64 rx_sz_ov; /* packets that are >MTU bytes (truncated) */ + u64 rx_rxf_ov; /* packets dropped due to RX FIFO overflow */ + u64 rx_align_err; /* alignment errors */ + u64 rx_bcast_byte_cnt; /* broadcast packets byte count (without FCS) */ + u64 rx_mcast_byte_cnt; /* multicast packets byte count (without FCS) */ + u64 rx_err_addr; /* packets dropped due to address filtering */ + u64 rx_crc_align; /* CRC align errors */ + u64 rx_jabbers; /* jabbers */ + + /* tx */ + u64 tx_ok; /* good packets */ + u64 tx_bcast; /* good broadcast packets */ + u64 tx_mcast; /* good multicast packets */ + u64 tx_pause; /* pause packets */ + u64 tx_exc_defer; /* packets with excessive deferral */ + u64 tx_ctrl; /* control packets other than pause frame */ + u64 tx_defer; /* packets that are deferred. */ + u64 tx_byte_cnt; /* good bytes count (without FCS) */ + u64 tx_sz_64; /* packets that are 64 bytes */ + u64 tx_sz_65_127; /* packets that are 65-127 bytes */ + u64 tx_sz_128_255; /* packets that are 128-255 bytes */ + u64 tx_sz_256_511; /* packets that are 256-511 bytes */ + u64 tx_sz_512_1023; /* packets that are 512-1023 bytes */ + u64 tx_sz_1024_1518; /* packets that are 1024-1518 bytes */ + u64 tx_sz_1519_max; /* packets that are 1519-MTU bytes */ + u64 tx_1_col; /* packets single prior collision */ + u64 tx_2_col; /* packets with multiple prior collisions */ + u64 tx_late_col; /* packets with late collisions */ + u64 tx_abort_col; /* packets aborted due to excess collisions */ + u64 tx_underrun; /* packets aborted due to FIFO underrun */ + u64 tx_rd_eop; /* count of reads beyond EOP */ + u64 tx_len_err; /* packets with length mismatch */ + u64 tx_trunc; /* packets truncated due to size >MTU */ + u64 tx_bcast_byte; /* broadcast packets byte count (without FCS) */ + u64 tx_mcast_byte; /* multicast packets byte count (without FCS) */ + u64 tx_col; /* collisions */ + + spinlock_t lock; /* prevent multiple simultaneous readers */ +}; + +/* RSS hstype Definitions */ +#define EMAC_RSS_HSTYP_IPV4_EN 0x00000001 +#define EMAC_RSS_HSTYP_TCP4_EN 0x00000002 +#define EMAC_RSS_HSTYP_IPV6_EN 0x00000004 +#define EMAC_RSS_HSTYP_TCP6_EN 0x00000008 +#define EMAC_RSS_HSTYP_ALL_EN (\ + EMAC_RSS_HSTYP_IPV4_EN |\ + EMAC_RSS_HSTYP_TCP4_EN |\ + EMAC_RSS_HSTYP_IPV6_EN |\ + EMAC_RSS_HSTYP_TCP6_EN) + +#define EMAC_VLAN_TO_TAG(_vlan, _tag) \ + (_tag = ((((_vlan) >> 8) & 0xFF) | (((_vlan) & 0xFF) << 8))) + +#define EMAC_TAG_TO_VLAN(_tag, _vlan) \ + (_vlan = ((((_tag) >> 8) & 0xFF) | (((_tag) & 0xFF) << 8))) + +#define EMAC_DEF_RX_BUF_SIZE 1536 +#define EMAC_MAX_JUMBO_PKT_SIZE (9 * 1024) +#define EMAC_MAX_TX_OFFLOAD_THRESH (9 * 1024) + +#define EMAC_MAX_ETH_FRAME_SIZE EMAC_MAX_JUMBO_PKT_SIZE +#define EMAC_MIN_ETH_FRAME_SIZE 68 + +#define EMAC_DEF_TX_QUEUES 1 +#define EMAC_DEF_RX_QUEUES 1 + +#define EMAC_MIN_TX_DESCS 128 +#define EMAC_MIN_RX_DESCS 128 + +#define EMAC_MAX_TX_DESCS 16383 +#define EMAC_MAX_RX_DESCS 2047 + +#define EMAC_DEF_TX_DESCS 512 +#define EMAC_DEF_RX_DESCS 256 + +#define EMAC_DEF_RX_IRQ_MOD 250 +#define EMAC_DEF_TX_IRQ_MOD 250 + +#define EMAC_WATCHDOG_TIME (5 * HZ) + +/* by default check link every 4 seconds */ +#define EMAC_TRY_LINK_TIMEOUT (4 * HZ) + +/* emac_irq per-device (per-adapter) irq properties. + * @irq: irq number. + * @mask mask to use over status register. + */ +struct emac_irq { + unsigned int irq; + u32 mask; +}; + +/* The device's main data structure */ +struct emac_adapter { + struct net_device *netdev; + struct mii_bus *mii_bus; + struct phy_device *phydev; + + void __iomem *base; + void __iomem *csr; + + struct emac_phy phy; + struct emac_stats stats; + + struct emac_irq irq; + struct clk *clk[EMAC_CLK_CNT]; + + /* All Descriptor memory */ + struct emac_ring_header ring_header; + struct emac_tx_queue tx_q; + struct emac_rx_queue rx_q; + unsigned int tx_desc_cnt; + unsigned int rx_desc_cnt; + unsigned int rrd_size; /* in quad words */ + unsigned int rfd_size; /* in quad words */ + unsigned int tpd_size; /* in quad words */ + + unsigned int rxbuf_size; + + /* Ring parameter */ + u8 tpd_burst; + u8 rfd_burst; + unsigned int dmaw_dly_cnt; + unsigned int dmar_dly_cnt; + enum emac_dma_req_block dmar_block; + enum emac_dma_req_block dmaw_block; + enum emac_dma_order dma_order; + + u32 irq_mod; + u32 preamble; + + struct work_struct work_thread; + + u16 msg_enable; + + struct mutex reset_lock; +}; + +int emac_reinit_locked(struct emac_adapter *adpt); +void emac_reg_update32(void __iomem *addr, u32 mask, u32 val); +irqreturn_t emac_isr(int irq, void *data); + +#endif /* _EMAC_H_ */ -- cgit v1.2.3 From ba289af8020a6e81eac424e1d4ef3fcc8ff1b23d Mon Sep 17 00:00:00 2001 From: Roger Chen Date: Fri, 2 Sep 2016 01:49:59 +0800 Subject: net: stmmac: dwmac-rk: add rk3366 & rk3399 specific data Add constants and callback functions for the dwmac on rk3228/rk3229 socs. As can be seen, the base structure is the same, only registers and the bits in them moved slightly. Signed-off-by: Roger Chen Signed-off-by: Caesar Wang Reviewed-by: Heiko Stuebner Signed-off-by: David S. Miller --- .../devicetree/bindings/net/rockchip-dwmac.txt | 8 +- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 226 +++++++++++++++++++++ 2 files changed, 232 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt index cccd945fc45b..95383c5131fc 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt @@ -3,8 +3,12 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC) The device node has following properties. Required properties: - - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac", - "rockchip,rk3368-gmac" + - compatible: should be "rockchip,-gamc" + "rockchip,rk3228-gmac": found on RK322x SoCs + "rockchip,rk3288-gmac": found on RK3288 SoCs + "rockchip,rk3366-gmac": found on RK3366 SoCs + "rockchip,rk3368-gmac": found on RK3368 SoCs + "rockchip,rk3399-gmac": found on RK3399 SoCs - reg: addresses and length of the register sets for the device. - interrupts: Should contain the GMAC interrupts. - interrupt-names: Should contain the interrupt names "macirq". diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 92105916ef40..4e6a27088313 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -301,6 +301,118 @@ static const struct rk_gmac_ops rk3288_ops = { .set_rmii_speed = rk3288_set_rmii_speed, }; +#define RK3366_GRF_SOC_CON6 0x0418 +#define RK3366_GRF_SOC_CON7 0x041c + +/* RK3366_GRF_SOC_CON6 */ +#define RK3366_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3366_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3366_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3366_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3366_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3366_GMAC_SPEED_100M GRF_BIT(7) +#define RK3366_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3366_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3366_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3366_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_RMII_MODE GRF_BIT(6) +#define RK3366_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3366_GRF_SOC_CON7 */ +#define RK3366_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3366_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3366_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3366_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3366_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3366_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RGMII | + RK3366_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7, + RK3366_GMAC_RXCLK_DLY_ENABLE | + RK3366_GMAC_TXCLK_DLY_ENABLE | + RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3366_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE); +} + +static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_2_5M | + RK3366_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_25M | + RK3366_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3366_ops = { + .set_to_rgmii = rk3366_set_to_rgmii, + .set_to_rmii = rk3366_set_to_rmii, + .set_rgmii_speed = rk3366_set_rgmii_speed, + .set_rmii_speed = rk3366_set_rmii_speed, +}; + #define RK3368_GRF_SOC_CON15 0x043c #define RK3368_GRF_SOC_CON16 0x0440 @@ -413,6 +525,118 @@ static const struct rk_gmac_ops rk3368_ops = { .set_rmii_speed = rk3368_set_rmii_speed, }; +#define RK3399_GRF_SOC_CON5 0xc214 +#define RK3399_GRF_SOC_CON6 0xc218 + +/* RK3399_GRF_SOC_CON5 */ +#define RK3399_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3399_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3399_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3399_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3399_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3399_GMAC_SPEED_100M GRF_BIT(7) +#define RK3399_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3399_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3399_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3399_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_RMII_MODE GRF_BIT(6) +#define RK3399_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3399_GRF_SOC_CON6 */ +#define RK3399_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3399_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3399_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3399_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3399_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3399_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RGMII | + RK3399_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6, + RK3399_GMAC_RXCLK_DLY_ENABLE | + RK3399_GMAC_TXCLK_DLY_ENABLE | + RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3399_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE); +} + +static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_2_5M | + RK3399_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_25M | + RK3399_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3399_ops = { + .set_to_rgmii = rk3399_set_to_rgmii, + .set_to_rmii = rk3399_set_to_rmii, + .set_rgmii_speed = rk3399_set_rgmii_speed, + .set_rmii_speed = rk3399_set_rmii_speed, +}; + static int gmac_clk_init(struct rk_priv_data *bsp_priv) { struct device *dev = &bsp_priv->pdev->dev; @@ -760,7 +984,9 @@ static int rk_gmac_probe(struct platform_device *pdev) static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops }, { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops }, + { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops }, { .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops }, + { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops }, { } }; MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); -- cgit v1.2.3 From 216559d9032704a7a5d2fcd3c9c086dd9f7cd557 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 7 Sep 2016 15:53:31 +0200 Subject: net: smsc911x: augment device tree bindings This adds device tree bindings for: - An optional GPIO line for releasing the RESET signal to the SMSC911x devices - An optional PME (power management event) interrupt line that can be utilized to wake up the system on network activity. This signal exist on all the SMSC911x devices, it is just not very often routed. Both these lines are routed to the SoC on the Qualcomm APQ8060 Dragonboard and thus needs to be bound in the device tree. Cc: devicetree@vger.kernel.org Cc: Jeremy Linton Signed-off-by: Linus Walleij Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/smsc911x.txt | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt index 3fed3c124411..16c3a9501f5d 100644 --- a/Documentation/devicetree/bindings/net/smsc911x.txt +++ b/Documentation/devicetree/bindings/net/smsc911x.txt @@ -3,9 +3,11 @@ Required properties: - compatible : Should be "smsc,lan", "smsc,lan9115" - reg : Address and length of the io space for SMSC LAN -- interrupts : Should contain SMSC LAN interrupt line -- interrupt-parent : Should be the phandle for the interrupt controller - that services interrupts for this device +- interrupts : one or two interrupt specifiers + - The first interrupt is the SMSC LAN interrupt line + - The second interrupt (if present) is the PME (power + management event) interrupt that is able to wake up the host + system with a 50ms pulse on network activity - phy-mode : See ethernet.txt file in the same directory Optional properties: @@ -21,6 +23,10 @@ Optional properties: external PHY - smsc,save-mac-address : Indicates that mac address needs to be saved before resetting the controller +- reset-gpios : a GPIO line connected to the RESET (active low) signal + of the device. On many systems this is wired high so the device goes + out of reset at power-on, but if it is under program control, this + optional GPIO can wake up in response to it. Examples: @@ -29,7 +35,8 @@ lan9220@f4000000 { reg = <0xf4000000 0x2000000>; phy-mode = "mii"; interrupt-parent = <&gpio1>; - interrupts = <31>; + interrupts = <31>, <32>; + reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; reg-io-width = <4>; smsc,irq-push-pull; }; -- cgit v1.2.3 From e5dcad290a7c62d1c856269dbd13e470e388b704 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:39 +0200 Subject: Documentation: devicetree: add qca8k binding Add device-tree binding for ar8xxx switch families. Cc: devicetree@vger.kernel.org Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/qca8k.txt | 89 ++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/dsa/qca8k.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt new file mode 100644 index 000000000000..9c67ee4890d7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -0,0 +1,89 @@ +* Qualcomm Atheros QCA8xxx switch family + +Required properties: + +- compatible: should be "qca,qca8337" +- #size-cells: must be 0 +- #address-cells: must be 1 + +Subnodes: + +The integrated switch subnode should be specified according to the binding +described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of +port and PHY id, each subnode describing a port needs to have a valid phandle +referencing the internal PHY connected to it. The CPU port of this switch is +always port 0. + +Example: + + + &mdio0 { + phy_port1: phy@0 { + reg = <0>; + }; + + phy_port2: phy@1 { + reg = <1>; + }; + + phy_port3: phy@2 { + reg = <2>; + }; + + phy_port4: phy@3 { + reg = <3>; + }; + + phy_port5: phy@4 { + reg = <4>; + }; + + switch0@0 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + + reg = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&phy_port1>; + }; + + port@2 { + reg = <2>; + label = "lan2"; + phy-handle = <&phy_port2>; + }; + + port@3 { + reg = <3>; + label = "lan3"; + phy-handle = <&phy_port3>; + }; + + port@4 { + reg = <4>; + label = "lan4"; + phy-handle = <&phy_port4>; + }; + + port@5 { + reg = <5>; + label = "wan"; + phy-handle = <&phy_port5>; + }; + }; + }; + }; -- cgit v1.2.3 From 4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:19 -0700 Subject: ipvlan: Introduce l3s mode In a typical IPvlan L3 setup where master is in default-ns and each slave is into different (slave) ns. In this setup egress packet processing for traffic originating from slave-ns will hit all NF_HOOKs in slave-ns as well as default-ns. However same is not true for ingress processing. All these NF_HOOKs are hit only in the slave-ns skipping them in the default-ns. IPvlan in L3 mode is restrictive and if admins want to deploy iptables rules in default-ns, this asymmetric data path makes it impossible to do so. This patch makes use of the l3_rcv() (added as part of l3mdev enhancements) to perform input route lookup on RX packets without changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN to change the skb->dev just before handing over skb to L4. Signed-off-by: Mahesh Bandewar CC: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ipvlan.txt | 7 ++- drivers/net/Kconfig | 1 + drivers/net/ipvlan/ipvlan.h | 6 +++ drivers/net/ipvlan/ipvlan_core.c | 94 +++++++++++++++++++++++++++++++++++++ drivers/net/ipvlan/ipvlan_main.c | 87 +++++++++++++++++++++++++++++++--- include/uapi/linux/if_link.h | 1 + 6 files changed, 188 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 14422f8fcdc4..24196cef7c91 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module There are no module parameters for this driver and it can be configured using IProute2/ip utility. - ip link add link type ipvlan mode { l2 | L3 } + ip link add link type ipvlan mode { l2 | l3 | l3s } e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 @@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be used before packets are queued on the outbound device. In this mode the slaves will not receive nor can send multicast / broadcast traffic. +4.3 L3S mode: + This is very similar to the L3 mode except that iptables (conn-tracking) +works in this mode and hence it is L3-symmetric (L3s). This will have slightly less +performance but that shouldn't matter since you are choosing this mode over plain-L3 +mode to make conn-tracking work. 5. What to choose (macvlan vs. ipvlan)? These two devices are very similar in many regards and the specific use diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0c5415b05ea9..8768a625350d 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 695a5dc9ace3..7e0732f5ea07 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include #define IPVLAN_DRV "ipvlan" #define IPV_DRV_VER "0.1" @@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto); +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b5f9511d819e..b4e990743e1d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: + case IPVLAN_MODE_L3S: return ipvlan_xmit_mode_l3(skb, dev); } @@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); + case IPVLAN_MODE_L3S: + return RX_HANDLER_PASS; } /* Should not reach here */ @@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + void *lyr3h; + int addr_type; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + int err; + struct iphdr *ip4h = ip_hdr(skb); + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + skb_dst_set(skb, dst); + break; + } + default: + break; + } + +out: + return skb; +} + +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 18b4e8c7f68a..f442eb366863 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,24 +9,87 @@ #include "ipvlan.h" +static u32 ipvl_nf_hook_refcnt = 0; + +static struct nf_hook_ops ipvl_nfops[] __read_mostly = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +}; + +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; } -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) +static int ipvlan_register_nf_hook(void) +{ + int err = 0; + + if (!ipvl_nf_hook_refcnt) { + err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); + if (!err) + ipvl_nf_hook_refcnt = 1; + } else { + ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(void) +{ + WARN_ON(!ipvl_nf_hook_refcnt); + + ipvl_nf_hook_refcnt--; + if (!ipvl_nf_hook_refcnt) + _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); +} + +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; + struct net_device *mdev = port->dev; + int err = 0; + ASSERT_RTNL(); if (port->mode != nval) { + if (nval == IPVLAN_MODE_L3S) { + /* New mode is L3S */ + err = ipvlan_register_nf_hook(); + if (!err) { + mdev->l3mdev_ops = &ipvl_l3mdev_ops; + mdev->priv_flags |= IFF_L3MDEV_MASTER; + } else + return err; + } else if (port->mode == IPVLAN_MODE_L3S) { + /* Old mode was L3S */ + mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + mdev->l3mdev_ops = NULL; + } list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3) + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) ipvlan->dev->flags |= IFF_NOARP; else ipvlan->dev->flags &= ~IFF_NOARP; } port->mode = nval; } + return err; } static int ipvlan_port_create(struct net_device *dev) @@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); dev->priv_flags &= ~IFF_IPVLAN_MASTER; + if (port->mode == IPVLAN_MODE_L3S) { + dev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + dev->l3mdev_ops = NULL; + } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); __skb_queue_purge(&port->backlog); @@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev) struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; - if (ipvlan->port->mode == IPVLAN_MODE_L3) + if (ipvlan->port->mode == IPVLAN_MODE_L3 || + ipvlan->port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; @@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev, { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int err = 0; if (data && data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); - ipvlan_set_port_mode(port, nmode); + err = ipvlan_set_port_mode(port, nmode); } - return 0; + return err; } static size_t ipvlan_nl_getsize(const struct net_device *dev) @@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, unregister_netdevice(dev); return err; } + err = ipvlan_set_port_mode(port, mode); + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); - ipvlan_set_port_mode(port, mode); - netif_stacked_transfer_operstate(phy_dev, dev); return 0; } diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2351776a724f..7ec9e99d5491 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -464,6 +464,7 @@ enum { enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, IPVLAN_MODE_MAX }; -- cgit v1.2.3 From 004e6cc6c181aa109427594fca35eb55d89d780e Mon Sep 17 00:00:00 2001 From: Nelson Chang Date: Sat, 17 Sep 2016 23:50:57 +0800 Subject: net: ethernet: mediatek: add the dts property to set if the HW supports LRO Add the dts property for the capability if the hardware supports LRO. Signed-off-by: Nelson Chang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 32eaaca04d9b..6103e5583070 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -24,7 +24,7 @@ Required properties: Optional properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - +- mediatek,hwlro: the capability if the hardware supports LRO functions * Ethernet MAC node @@ -51,6 +51,7 @@ eth: ethernet@1b100000 { reset-names = "eth"; mediatek,ethsys = <ðsys>; mediatek,pctl = <&syscfg_pctl_a>; + mediatek,hwlro; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3 From b88539658a597e649627c4cc2d446456fc01e104 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:56 +0800 Subject: net: ethernet: mediatek: add the dts property to set if TRGMII supported on GMAC0 Add the dts property for the capability if TRGMII supported on GAMC0 Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 6103e5583070..7111278adc7e 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -31,7 +31,10 @@ Optional properties: Required properties: - compatible: Should be "mediatek,eth-mac" - reg: The number of the MAC -- phy-handle: see ethernet.txt file in the same directory. +- phy-handle: see ethernet.txt file in the same directory and + the phy-mode "trgmii" required being provided when reg + is equal to 0 and the MAC uses fixed-link to connect + with inernal switch such as MT7530. Example: -- cgit v1.2.3 From 4ce4862a815e3cee8040c9d91e2148aecbbf056e Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:04:09 +0800 Subject: Documentation: devicetree: revise ethernet device-tree binding about TRGMII add phy-mode "trgmii" to Documentation/devicetree/bindings/net/ethernet.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 5d88f37480b6..e1d76812419c 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -11,8 +11,8 @@ The following properties are common to the Ethernet controllers: the maximum frame size (there's contradiction in ePAPR). - phy-mode: string, operation mode of the PHY interface; supported values are "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id", - "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto - standard property; + "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii", "trgmii"; this is now a + de-facto standard property; - phy-connection-type: the same as "phy-mode" property but described in ePAPR; - phy-handle: phandle, specifies a reference to a node representing a PHY device; this property is described in ePAPR and so preferred; -- cgit v1.2.3 From 7f8c2865a94a73308386627cd7556c17f03efb63 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 23 Sep 2016 14:09:32 +0800 Subject: Documentation: devicetree: fix typo in MediaTek ethernet device-tree binding fix typo in Documentation/devicetree/bindings/net/mediatek-net.txt Cc: devicetree@vger.kernel.org Reported-by: Sergei Shtylyov Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mediatek-net.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 7111278adc7e..f09525772369 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -34,7 +34,7 @@ Required properties: - phy-handle: see ethernet.txt file in the same directory and the phy-mode "trgmii" required being provided when reg is equal to 0 and the MAC uses fixed-link to connect - with inernal switch such as MT7530. + with internal switch such as MT7530. Example: -- cgit v1.2.3 From fd41b0eaa06a8a0516f9e0b0a5889035bf423784 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:34 +0200 Subject: doc: update switchdev L3 section This is to reflect the change of FIB offload infrastructure from switchdev objects to FIB notifier. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 44235e83799b..2bbac05ab9e2 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -314,30 +314,29 @@ the kernel, with the device doing the FIB lookup and forwarding. The device does a longest prefix match (LPM) on FIB entries matching route prefix and forwards the packet to the matching FIB entry's nexthop(s) egress ports. -To program the device, the driver implements support for -SWITCHDEV_OBJ_IPV[4|6]_FIB object using switchdev_port_obj_xxx ops. -switchdev_port_obj_add is used for both adding a new FIB entry to the device, -or modifying an existing entry on the device. +To program the device, the driver has to register a FIB notifier handler +using register_fib_notifier. The following events are available: +FIB_EVENT_ENTRY_ADD: used for both adding a new FIB entry to the device, + or modifying an existing entry on the device. +FIB_EVENT_ENTRY_DEL: used for removing a FIB entry +FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL: used to propagate FIB rule changes -XXX: Currently, only SWITCHDEV_OBJ_ID_IPV4_FIB objects are supported. +FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass: -SWITCHDEV_OBJ_ID_IPV4_FIB object passes: - - struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ u32 dst; int dst_len; struct fib_info *fi; u8 tos; u8 type; - u32 nlflags; u32 tb_id; - } ipv4_fib; + u32 nlflags; + }; to add/modify/delete IPv4 dst/dest_len prefix on table tb_id. The *fi structure holds details on the route and route's nexthops. *dev is one of the -port netdevs mentioned in the routes next hop list. If the output port netdevs -referenced in the route's nexthop list don't all have the same switch ID, the -driver is not called to add/modify/delete the FIB entry. +port netdevs mentioned in the route's next hop list. Routes offloaded to the device are labeled with "offload" in the ip route listing: @@ -355,6 +354,8 @@ listing: 12.0.0.4 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.15 +The "offload" flag is set in case at least one device offloads the FIB entry. + XXX: add/mod/del IPv6 FIB API Nexthop Resolution -- cgit v1.2.3 From c099ff3cdb9dd2281dee3dc4ef89fec1c516df22 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 27 Sep 2016 01:23:26 +0300 Subject: sh_eth: add R8A7743/5 support Add support for the first two members of the Renesas RZ/G family, RZ/G1M/E (also known as R8A7743/5). The Ether core is the same as in the R-Car gen2 SoCs, so will share the code/data with them... Signed-off-by: Sergei Shtylyov Acked-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/sh_eth.txt | 2 ++ drivers/net/ethernet/renesas/Kconfig | 2 +- drivers/net/ethernet/renesas/sh_eth.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index 2f6ec85fda8e..0115c85a2425 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -5,6 +5,8 @@ interface contains. Required properties: - compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC. + "renesas,ether-r8a7743" if the device is a part of R8A7743 SoC. + "renesas,ether-r8a7745" if the device is a part of R8A7745 SoC. "renesas,ether-r8a7778" if the device is a part of R8A7778 SoC. "renesas,ether-r8a7779" if the device is a part of R8A7779 SoC. "renesas,ether-r8a7790" if the device is a part of R8A7790 SoC. diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index 4f132cf177cd..85ec447c2d18 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -27,7 +27,7 @@ config SH_ETH Renesas SuperH Ethernet device driver. This driver supporting CPUs are: - SH7619, SH7710, SH7712, SH7724, SH7734, SH7763, SH7757, - R8A7740, R8A777x and R8A779x. + R8A7740, R8A774x, R8A777x and R8A779x. config RAVB tristate "Renesas Ethernet AVB support" diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 440ae272161c..05b0dc55de77 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2959,6 +2959,8 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data }, + { .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data }, + { .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data }, { .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data }, -- cgit v1.2.3 From a4cc96d1f0170b779c32c6b2cc58764f5d2cdef0 Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Mon, 3 Oct 2016 12:53:13 +0530 Subject: net: phy: Add Edge-rate driver for Microsemi PHYs. Edge-rate: As system and networking speeds increase, a signal's output transition, also know as the edge rate or slew rate (V/ns), takes on greater importance because high-speed signals come with a price. That price is an assortment of interference problems like ringing on the line, signal overshoot and undershoot, extended signal settling times, crosstalk noise, transmission line reflections, false signal detection by the receiving device and electromagnetic interference (EMI) -- all of which can negate the potential gains designers are seeking when they try to increase system speeds through the use of higher performance logic devices. The fact is, faster signaling edge rates can cause a higher level of electrical noise or other type of interference that can actually lead to slower line speeds and lower maximum system frequencies. This parameter allow the board designers to change the driving strange, and thereby change the EMI behavioral. Edge-rate parameters (vddmac, edge-slowdown) get from Device Tree. Tested on Beaglebone Black with VSC 8531 PHY. Signed-off-by: Raju Lakkaraju Signed-off-by: David S. Miller --- .../devicetree/bindings/net/mscc-phy-vsc8531.txt | 58 ++++++++++ drivers/net/phy/mscc.c | 125 +++++++++++++++++++++ include/dt-bindings/net/mscc-phy-vsc8531.h | 21 ++++ 3 files changed, 204 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt create mode 100644 include/dt-bindings/net/mscc-phy-vsc8531.h (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt new file mode 100644 index 000000000000..99c7eb0a00c8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt @@ -0,0 +1,58 @@ +* Microsemi - vsc8531 Giga bit ethernet phy + +Required properties: +- compatible : Should contain phy id as "ethernet-phy-idAAAA.BBBB" + The PHY device uses the binding described in + Documentation/devicetree/bindings/net/phy.txt + +Optional properties: +- vsc8531,vddmac : The vddmac in mV. +- vsc8531,edge-slowdown : % the edge should be slowed down relative to + the fastest possible edge time. Native sign + need not enter. + Edge rate sets the drive strength of the MAC + interface output signals. Changing the drive + strength will affect the edge rate of the output + signal. The goal of this setting is to help + reduce electrical emission (EMI) by being able + to reprogram drive strength and in effect slow + down the edge rate if desired. Table 1 shows the + impact to the edge rate per VDDMAC supply for each + drive strength setting. + Ref: Table:1 - Edge rate change below. + +Note: see dt-bindings/net/mscc-phy-vsc8531.h for applicable values + +Table: 1 - Edge rate change +----------------------------------------------------------------| +| Edge Rate Change (VDDMAC) | +| | +| 3300 mV 2500 mV 1800 mV 1500 mV | +|---------------------------------------------------------------| +| Default Deafult Default Default | +| (Fastest) (recommended) (recommended) | +|---------------------------------------------------------------| +| -2% -3% -5% -6% | +|---------------------------------------------------------------| +| -4% -6% -9% -14% | +|---------------------------------------------------------------| +| -7% -10% -16% -21% | +|(recommended) (recommended) | +|---------------------------------------------------------------| +| -10% -14% -23% -29% | +|---------------------------------------------------------------| +| -17% -23% -35% -42% | +|---------------------------------------------------------------| +| -29% -37% -52% -58% | +|---------------------------------------------------------------| +| -53% -63% -76% -77% | +| (slowest) | +|---------------------------------------------------------------| + +Example: + + vsc8531_0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0570"; + vsc8531,vddmac = <3300>; + vsc8531,edge-slowdown = <21>; + }; diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index d350debd174a..a17573e3bd8a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include enum rgmii_rx_clock_delay { RGMII_RX_CLK_DELAY_0_2_NS = 0, @@ -37,6 +39,10 @@ enum rgmii_rx_clock_delay { #define MII_VSC85XX_INT_MASK_MASK 0xa000 #define MII_VSC85XX_INT_STATUS 26 +#define MSCC_PHY_WOL_MAC_CONTROL 27 +#define EDGE_RATE_CNTL_POS 5 +#define EDGE_RATE_CNTL_MASK 0x00E0 + #define MSCC_EXT_PAGE_ACCESS 31 #define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ #define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ @@ -50,6 +56,23 @@ enum rgmii_rx_clock_delay { #define PHY_ID_VSC8531 0x00070570 #define PHY_ID_VSC8541 0x00070770 +struct edge_rate_table { + u16 vddmac; + int slowdown[MSCC_SLOWDOWN_MAX]; +}; + +struct edge_rate_table edge_table[MSCC_VDDMAC_MAX] = { + {3300, { 0, -2, -4, -7, -10, -17, -29, -53} }, + {2500, { 0, -3, -6, -10, -14, -23, -37, -63} }, + {1800, { 0, -5, -9, -16, -23, -35, -52, -76} }, + {1500, { 0, -6, -14, -21, -29, -42, -58, -77} }, +}; + +struct vsc8531_private { + u8 edge_slowdown; + u16 vddmac; +}; + static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) { int rc; @@ -58,6 +81,51 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) return rc; } +static u8 edge_rate_magic_get(u16 vddmac, + int slowdown) +{ + int rc = (MSCC_SLOWDOWN_MAX - 1); + u8 vdd; + u8 sd; + + for (vdd = 0; vdd < MSCC_VDDMAC_MAX; vdd++) { + if (edge_table[vdd].vddmac == vddmac) { + for (sd = 0; sd < MSCC_SLOWDOWN_MAX; sd++) { + if (edge_table[vdd].slowdown[sd] <= slowdown) { + rc = (MSCC_SLOWDOWN_MAX - sd - 1); + break; + } + } + } + } + + return rc; +} + +static int vsc85xx_edge_rate_cntl_set(struct phy_device *phydev, + u8 edge_rate) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + reg_val = phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL); + reg_val &= ~(EDGE_RATE_CNTL_MASK); + reg_val |= (edge_rate << EDGE_RATE_CNTL_POS); + rc = phy_write(phydev, MSCC_PHY_WOL_MAC_CONTROL, reg_val); + if (rc != 0) + goto out_unlock; + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + static int vsc85xx_mac_if_set(struct phy_device *phydev, phy_interface_t interface) { @@ -116,9 +184,45 @@ out_unlock: return rc; } +#ifdef CONFIG_OF_MDIO +static int vsc8531_of_init(struct phy_device *phydev) +{ + int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + struct device *dev = &phydev->mdio.dev; + struct device_node *of_node = dev->of_node; + + if (!of_node) + return -ENODEV; + + rc = of_property_read_u16(of_node, "vsc8531,vddmac", + &vsc8531->vddmac); + if (rc == -EINVAL) + vsc8531->vddmac = MSCC_VDDMAC_3300; + rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", + &vsc8531->edge_slowdown); + if (rc == -EINVAL) + vsc8531->edge_slowdown = 0; + + rc = 0; + return rc; +} +#else +static int vsc8531_of_init(struct phy_device *phydev) +{ + return 0; +} +#endif /* CONFIG_OF_MDIO */ + static int vsc85xx_config_init(struct phy_device *phydev) { int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + u8 edge_rate; + + rc = vsc8531_of_init(phydev); + if (rc) + return rc; rc = vsc85xx_default_config(phydev); if (rc) @@ -128,6 +232,12 @@ static int vsc85xx_config_init(struct phy_device *phydev) if (rc) return rc; + edge_rate = edge_rate_magic_get(vsc8531->vddmac, + -(int)vsc8531->edge_slowdown); + rc = vsc85xx_edge_rate_cntl_set(phydev, edge_rate); + if (rc) + return rc; + rc = genphy_config_init(phydev); return rc; @@ -160,6 +270,19 @@ static int vsc85xx_config_intr(struct phy_device *phydev) return rc; } +static int vsc85xx_probe(struct phy_device *phydev) +{ + struct vsc8531_private *vsc8531; + + vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); + if (!vsc8531) + return -ENOMEM; + + phydev->priv = vsc8531; + + return 0; +} + /* Microsemi VSC85xx PHYs */ static struct phy_driver vsc85xx_driver[] = { { @@ -177,6 +300,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .probe = &vsc85xx_probe, }, { .phy_id = PHY_ID_VSC8541, @@ -193,6 +317,7 @@ static struct phy_driver vsc85xx_driver[] = { .config_intr = &vsc85xx_config_intr, .suspend = &genphy_suspend, .resume = &genphy_resume, + .probe = &vsc85xx_probe, } }; diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h new file mode 100644 index 000000000000..2383dd20ff43 --- /dev/null +++ b/include/dt-bindings/net/mscc-phy-vsc8531.h @@ -0,0 +1,21 @@ +/* + * Device Tree constants for Microsemi VSC8531 PHY + * + * Author: Nagaraju Lakkaraju + * + * License: Dual MIT/GPL + * Copyright (c) 2016 Microsemi Corporation + */ + +#ifndef _DT_BINDINGS_MSCC_VSC8531_H +#define _DT_BINDINGS_MSCC_VSC8531_H + +/* MAC interface Edge rate control VDDMAC in milli Volts */ +#define MSCC_VDDMAC_3300 3300 +#define MSCC_VDDMAC_2500 2500 +#define MSCC_VDDMAC_1800 1800 +#define MSCC_VDDMAC_1500 1500 +#define MSCC_VDDMAC_MAX 4 +#define MSCC_SLOWDOWN_MAX 8 + +#endif -- cgit v1.2.3